Skip to content

Commit f04a58b

Browse files
authored
Merge pull request libgit2#4445 from tiennou/shallow/dry-commit-parsing
DRY commit parsing
2 parents 0ec0b2b + 5cf17e0 commit f04a58b

File tree

5 files changed

+76
-89
lines changed

5 files changed

+76
-89
lines changed

src/commit.c

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "message.h"
2121
#include "refs.h"
2222
#include "object.h"
23+
#include "array.h"
2324
#include "oidarray.h"
2425

2526
void git_commit__free(void *_commit)
@@ -383,24 +384,32 @@ int git_commit_amend(
383384
return error;
384385
}
385386

386-
int git_commit__parse_raw(void *_commit, const char *data, size_t size)
387+
static int commit_parse(git_commit *commit, const char *data, size_t size, unsigned int flags)
387388
{
388-
git_commit *commit = _commit;
389389
const char *buffer_start = data, *buffer;
390390
const char *buffer_end = buffer_start + size;
391391
git_oid parent_id;
392392
size_t header_len;
393393
git_signature dummy_sig;
394394

395+
assert(commit && data);
396+
395397
buffer = buffer_start;
396398

397399
/* Allocate for one, which will allow not to realloc 90% of the time */
398400
git_array_init_to_size(commit->parent_ids, 1);
399401
GIT_ERROR_CHECK_ARRAY(commit->parent_ids);
400402

401403
/* The tree is always the first field */
402-
if (git_oid__parse(&commit->tree_id, &buffer, buffer_end, "tree ") < 0)
403-
goto bad_buffer;
404+
if (!(flags & GIT_COMMIT_PARSE_QUICK)) {
405+
if (git_oid__parse(&commit->tree_id, &buffer, buffer_end, "tree ") < 0)
406+
goto bad_buffer;
407+
} else {
408+
size_t tree_len = strlen("tree ") + GIT_OID_HEXSZ + 1;
409+
if (buffer + tree_len > buffer_end)
410+
goto bad_buffer;
411+
buffer += tree_len;
412+
}
404413

405414
/*
406415
* TODO: commit grafts!
@@ -413,11 +422,13 @@ int git_commit__parse_raw(void *_commit, const char *data, size_t size)
413422
git_oid_cpy(new_id, &parent_id);
414423
}
415424

416-
commit->author = git__malloc(sizeof(git_signature));
417-
GIT_ERROR_CHECK_ALLOC(commit->author);
425+
if (!(flags & GIT_COMMIT_PARSE_QUICK)) {
426+
commit->author = git__malloc(sizeof(git_signature));
427+
GIT_ERROR_CHECK_ALLOC(commit->author);
418428

419-
if (git_signature__parse(commit->author, &buffer, buffer_end, "author ", '\n') < 0)
420-
return -1;
429+
if (git_signature__parse(commit->author, &buffer, buffer_end, "author ", '\n') < 0)
430+
return -1;
431+
}
421432

422433
/* Some tools create multiple author fields, ignore the extra ones */
423434
while (!git__prefixncmp(buffer, buffer_end - buffer, "author ")) {
@@ -435,6 +446,9 @@ int git_commit__parse_raw(void *_commit, const char *data, size_t size)
435446
if (git_signature__parse(commit->committer, &buffer, buffer_end, "committer ", '\n') < 0)
436447
return -1;
437448

449+
if (flags & GIT_COMMIT_PARSE_QUICK)
450+
return 0;
451+
438452
/* Parse add'l header entries */
439453
while (buffer < buffer_end) {
440454
const char *eoln = buffer;
@@ -477,11 +491,19 @@ int git_commit__parse_raw(void *_commit, const char *data, size_t size)
477491
return -1;
478492
}
479493

494+
int git_commit__parse_raw(void *commit, const char *data, size_t size)
495+
{
496+
return commit_parse(commit, data, size, 0);
497+
}
498+
499+
int git_commit__parse_ext(git_commit *commit, git_odb_object *odb_obj, unsigned int flags)
500+
{
501+
return commit_parse(commit, git_odb_object_data(odb_obj), git_odb_object_size(odb_obj), flags);
502+
}
503+
480504
int git_commit__parse(void *_commit, git_odb_object *odb_obj)
481505
{
482-
return git_commit__parse_raw(_commit,
483-
git_odb_object_data(odb_obj),
484-
git_odb_object_size(odb_obj));
506+
return git_commit__parse_ext(_commit, odb_obj, 0);
485507
}
486508

487509
#define GIT_COMMIT_GETTER(_rvalue, _name, _return) \

src/commit.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,10 @@ void git_commit__free(void *commit);
3737
int git_commit__parse(void *commit, git_odb_object *obj);
3838
int git_commit__parse_raw(void *commit, const char *data, size_t size);
3939

40+
typedef enum {
41+
GIT_COMMIT_PARSE_QUICK = (1 << 0), /**< Only parse parents and committer info */
42+
} git_commit__parse_flags;
43+
44+
int git_commit__parse_ext(git_commit *commit, git_odb_object *odb_obj, unsigned int flags);
45+
4046
#endif

src/commit_list.c

Lines changed: 28 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "revwalk.h"
1111
#include "pool.h"
1212
#include "odb.h"
13+
#include "commit.h"
1314

1415
int git_commit_list_time_cmp(const void *a, const void *b)
1516
{
@@ -55,17 +56,6 @@ git_commit_list_node *git_commit_list_alloc_node(git_revwalk *walk)
5556
return (git_commit_list_node *)git_pool_mallocz(&walk->commit_pool, 1);
5657
}
5758

58-
static int commit_error(git_commit_list_node *commit, const char *msg)
59-
{
60-
char commit_oid[GIT_OID_HEXSZ + 1];
61-
git_oid_fmt(commit_oid, &commit->oid);
62-
commit_oid[GIT_OID_HEXSZ] = '\0';
63-
64-
git_error_set(GIT_ERROR_ODB, "failed to parse commit %s - %s", commit_oid, msg);
65-
66-
return -1;
67-
}
68-
6959
static git_commit_list_node **alloc_parents(
7060
git_revwalk *walk, git_commit_list_node *commit, size_t n_parents)
7161
{
@@ -111,77 +101,42 @@ git_commit_list_node *git_commit_list_pop(git_commit_list **stack)
111101

112102
static int commit_quick_parse(
113103
git_revwalk *walk,
114-
git_commit_list_node *commit,
115-
const uint8_t *buffer,
116-
size_t buffer_len)
104+
git_commit_list_node *node,
105+
git_odb_object *obj)
117106
{
118-
const size_t parent_len = strlen("parent ") + GIT_OID_HEXSZ + 1;
119-
const uint8_t *buffer_end = buffer + buffer_len;
120-
const uint8_t *parents_start, *committer_start;
121-
int i, parents = 0;
122-
int64_t commit_time;
123-
124-
buffer += strlen("tree ") + GIT_OID_HEXSZ + 1;
125-
126-
parents_start = buffer;
127-
while (buffer + parent_len < buffer_end && memcmp(buffer, "parent ", strlen("parent ")) == 0) {
128-
parents++;
129-
buffer += parent_len;
130-
}
131-
132-
commit->parents = alloc_parents(walk, commit, parents);
133-
GIT_ERROR_CHECK_ALLOC(commit->parents);
134-
135-
buffer = parents_start;
136-
for (i = 0; i < parents; ++i) {
137-
git_oid oid;
138-
139-
if (git_oid_fromstr(&oid, (const char *)buffer + strlen("parent ")) < 0)
140-
return -1;
107+
git_oid *parent_oid;
108+
git_commit *commit;
109+
int error;
110+
size_t i;
141111

142-
commit->parents[i] = git_revwalk__commit_lookup(walk, &oid);
143-
if (commit->parents[i] == NULL)
144-
return -1;
112+
commit = git__calloc(1, sizeof(*commit));
113+
GIT_ERROR_CHECK_ALLOC(commit);
114+
commit->object.repo = walk->repo;
145115

146-
buffer += parent_len;
116+
if ((error = git_commit__parse_ext(commit, obj, GIT_COMMIT_PARSE_QUICK)) < 0) {
117+
git__free(commit);
118+
return error;
147119
}
148120

149-
commit->out_degree = (unsigned short)parents;
150-
151-
if ((committer_start = buffer = memchr(buffer, '\n', buffer_end - buffer)) == NULL)
152-
return commit_error(commit, "object is corrupted");
153-
154-
buffer++;
155-
156-
if ((buffer = memchr(buffer, '\n', buffer_end - buffer)) == NULL)
157-
return commit_error(commit, "object is corrupted");
158-
159-
/* Skip trailing spaces */
160-
while (buffer > committer_start && git__isspace(*buffer))
161-
buffer--;
162-
163-
/* Seek for the beginning of the pack of digits */
164-
while (buffer > committer_start && git__isdigit(*buffer))
165-
buffer--;
166-
167-
/* Skip potential timezone offset */
168-
if ((buffer > committer_start) && (*buffer == '+' || *buffer == '-')) {
169-
buffer--;
121+
if (!git__is_uint16(git_array_size(commit->parent_ids))) {
122+
git__free(commit);
123+
git_error_set(GIT_ERROR_INVALID, "commit has more than 2^16 parents");
124+
return -1;
125+
}
170126

171-
while (buffer > committer_start && git__isspace(*buffer))
172-
buffer--;
127+
node->time = commit->committer->when.time;
128+
node->out_degree = (uint16_t) git_array_size(commit->parent_ids);
129+
node->parents = alloc_parents(walk, node, node->out_degree);
130+
GIT_ERROR_CHECK_ALLOC(node->parents);
173131

174-
while (buffer > committer_start && git__isdigit(*buffer))
175-
buffer--;
132+
git_array_foreach(commit->parent_ids, i, parent_oid) {
133+
node->parents[i] = git_revwalk__commit_lookup(walk, parent_oid);
176134
}
177135

178-
if ((buffer == committer_start) ||
179-
(git__strntol64(&commit_time, (char *)(buffer + 1),
180-
buffer_end - buffer + 1, NULL, 10) < 0))
181-
return commit_error(commit, "cannot parse commit time");
136+
git_commit__free(commit);
137+
138+
node->parsed = 1;
182139

183-
commit->time = commit_time;
184-
commit->parsed = 1;
185140
return 0;
186141
}
187142

@@ -200,10 +155,7 @@ int git_commit_list_parse(git_revwalk *walk, git_commit_list_node *commit)
200155
git_error_set(GIT_ERROR_INVALID, "object is no commit object");
201156
error = -1;
202157
} else
203-
error = commit_quick_parse(
204-
walk, commit,
205-
(const uint8_t *)git_odb_object_data(obj),
206-
git_odb_object_size(obj));
158+
error = commit_quick_parse(walk, commit, obj);
207159

208160
git_odb_object_free(obj);
209161
return error;

src/commit_list.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ typedef struct git_commit_list_node {
3333
added:1,
3434
flags : FLAG_BITS;
3535

36-
unsigned short in_degree;
37-
unsigned short out_degree;
36+
uint16_t in_degree;
37+
uint16_t out_degree;
3838

3939
struct git_commit_list_node **parents;
4040
} git_commit_list_node;

src/integer.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ GIT_INLINE(int) git__is_ssizet(size_t p)
2121
return p == (size_t)r;
2222
}
2323

24+
/** @return true if p fits into the range of a uint16_t */
25+
GIT_INLINE(int) git__is_uint16(size_t p)
26+
{
27+
uint16_t r = (uint16_t)p;
28+
return p == (size_t)r;
29+
}
30+
2431
/** @return true if p fits into the range of a uint32_t */
2532
GIT_INLINE(int) git__is_uint32(size_t p)
2633
{

0 commit comments

Comments
 (0)