Skip to content

Commit 846745a

Browse files
committed
commit-graph: Use the commit-graph in revwalks
This change makes revwalks a bit faster by using the `commit-graph` file (if present). This is thanks to the `commit-graph` allow much faster parsing of the commit information by requiring near-zero I/O (aside from reading a few dozen bytes off of a `mmap(2)`-ed file) for each commit, instead of having to read the ODB, inflate the commit, and parse it. This is done by modifying `git_commit_list_parse()` and letting it use the ODB-owned commit-graph file. Part of: libgit2#5757
1 parent 02879c6 commit 846745a

File tree

4 files changed

+116
-2
lines changed

4 files changed

+116
-2
lines changed

src/commit_list.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ static int commit_quick_parse(
124124
return -1;
125125
}
126126

127+
node->generation = 0;
127128
node->time = commit->committer->when.time;
128129
node->out_degree = (uint16_t) git_array_size(commit->parent_ids);
129130
node->parents = alloc_parents(walk, node, node->out_degree);
@@ -143,11 +144,38 @@ static int commit_quick_parse(
143144
int git_commit_list_parse(git_revwalk *walk, git_commit_list_node *commit)
144145
{
145146
git_odb_object *obj;
147+
git_commit_graph_file *cgraph = NULL;
146148
int error;
147149

148150
if (commit->parsed)
149151
return 0;
150152

153+
/* Let's try to use the commit graph first. */
154+
git_odb__get_commit_graph(&cgraph, walk->odb);
155+
if (cgraph) {
156+
git_commit_graph_entry e;
157+
158+
error = git_commit_graph_entry_find(&e, cgraph, &commit->oid, GIT_OID_RAWSZ);
159+
if (error == 0 && git__is_uint16(e.parent_count)) {
160+
size_t i;
161+
commit->generation = (uint32_t)e.generation;
162+
commit->time = e.commit_time;
163+
commit->out_degree = (uint16_t)e.parent_count;
164+
commit->parents = alloc_parents(walk, commit, commit->out_degree);
165+
GIT_ERROR_CHECK_ALLOC(commit->parents);
166+
167+
for (i = 0; i < commit->out_degree; ++i) {
168+
git_commit_graph_entry parent;
169+
error = git_commit_graph_entry_parent(&parent, cgraph, &e, i);
170+
if (error < 0)
171+
return error;
172+
commit->parents[i] = git_revwalk__commit_lookup(walk, &parent.sha1);
173+
}
174+
commit->parsed = 1;
175+
return 0;
176+
}
177+
}
178+
151179
if ((error = git_odb_read(&obj, walk->odb, &commit->oid)) < 0)
152180
return error;
153181

src/commit_list.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
typedef struct git_commit_list_node {
2727
git_oid oid;
2828
int64_t time;
29+
uint32_t generation;
2930
unsigned int seen:1,
3031
uninteresting:1,
3132
topo_delay:1,

src/odb.c

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,13 @@ int git_odb_new(git_odb **out)
465465
git__free(db);
466466
return -1;
467467
}
468+
if (git_buf_init(&db->objects_dir, 0) < 0) {
469+
git_vector_free(&db->backends);
470+
git_cache_dispose(&db->own_cache);
471+
git_mutex_free(&db->lock);
472+
git__free(db);
473+
return -1;
474+
}
468475

469476
*out = db;
470477
GIT_REFCOUNT_INC(db);
@@ -612,6 +619,17 @@ int git_odb__add_default_backends(
612619
git_mutex_unlock(&db->lock);
613620
#endif
614621

622+
if (git_mutex_lock(&db->lock) < 0) {
623+
git_error_set(GIT_ERROR_ODB, "failed to acquire the odb lock");
624+
return -1;
625+
}
626+
if (git_buf_len(&db->objects_dir) == 0 && git_buf_sets(&db->objects_dir, objects_dir) < 0) {
627+
git_mutex_unlock(&db->lock);
628+
git_odb_free(db);
629+
return -1;
630+
}
631+
git_mutex_unlock(&db->lock);
632+
615633
/* add the loose object backend */
616634
if (git_odb_backend_loose(&loose, objects_dir, -1, db->do_fsync, 0, 0) < 0 ||
617635
add_backend_internal(db, loose, GIT_LOOSE_PRIORITY, as_alternates, inode) < 0)
@@ -742,6 +760,8 @@ static void odb_free(git_odb *db)
742760
if (locked)
743761
git_mutex_unlock(&db->lock);
744762

763+
git_buf_dispose(&db->objects_dir);
764+
git_commit_graph_free(db->cgraph);
745765
git_vector_free(&db->backends);
746766
git_cache_dispose(&db->own_cache);
747767
git_mutex_free(&db->lock);
@@ -786,6 +806,53 @@ static int odb_exists_1(
786806
return (int)found;
787807
}
788808

809+
int git_odb__get_commit_graph(git_commit_graph_file **out, git_odb *db)
810+
{
811+
int error = 0;
812+
813+
if ((error = git_mutex_lock(&db->lock)) < 0) {
814+
git_error_set(GIT_ERROR_ODB, "failed to acquire the db lock");
815+
return error;
816+
}
817+
if (!db->cgraph_checked) {
818+
git_buf commit_graph_path = GIT_BUF_INIT;
819+
git_commit_graph_file *cgraph = NULL;
820+
821+
/* We only check once, no matter the result. */
822+
db->cgraph_checked = 1;
823+
824+
if (git_buf_len(&db->objects_dir) == 0) {
825+
/*
826+
* This odb was not opened with an objects directory
827+
* associated. Skip opening the commit graph.
828+
*/
829+
goto done;
830+
}
831+
832+
if ((error = git_buf_joinpath(
833+
&commit_graph_path,
834+
git_buf_cstr(&db->objects_dir),
835+
"info/commit-graph"))
836+
< 0) {
837+
git_buf_dispose(&commit_graph_path);
838+
goto done;
839+
}
840+
/* Best effort */
841+
error = git_commit_graph_open(&cgraph, git_buf_cstr(&commit_graph_path));
842+
git_buf_dispose(&commit_graph_path);
843+
844+
if (error < 0)
845+
goto done;
846+
847+
db->cgraph = cgraph;
848+
}
849+
850+
done:
851+
*out = db->cgraph;
852+
git_mutex_unlock(&db->lock);
853+
return 0;
854+
}
855+
789856
static int odb_freshen_1(
790857
git_odb *db,
791858
const git_oid *id,
@@ -1724,6 +1791,13 @@ int git_odb_refresh(struct git_odb *db)
17241791
}
17251792
}
17261793
}
1794+
if (db->cgraph && git_commit_graph_needs_refresh(db->cgraph, NULL)) {
1795+
/* We just free the commit graph. The next time it is requested, it will be re-loaded. */
1796+
git_commit_graph_free(db->cgraph);
1797+
db->cgraph = NULL;
1798+
}
1799+
/* Force a lazy re-check next time it is needed. */
1800+
db->cgraph_checked = 0;
17271801
git_mutex_unlock(&db->lock);
17281802

17291803
return 0;

src/odb.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@
1313
#include "git2/oid.h"
1414
#include "git2/types.h"
1515

16-
#include "vector.h"
1716
#include "cache.h"
18-
#include "posix.h"
17+
#include "commit_graph.h"
1918
#include "filter.h"
19+
#include "posix.h"
20+
#include "vector.h"
2021

2122
#define GIT_OBJECTS_DIR "objects/"
2223
#define GIT_OBJECT_DIR_MODE 0777
@@ -43,7 +44,10 @@ struct git_odb {
4344
git_mutex lock; /* protects backends */
4445
git_vector backends;
4546
git_cache own_cache;
47+
git_buf objects_dir;
48+
git_commit_graph_file *cgraph;
4649
unsigned int do_fsync :1;
50+
unsigned int cgraph_checked :1;
4751
};
4852

4953
typedef enum {
@@ -127,6 +131,13 @@ int git_odb__read_header_or_object(
127131
git_odb_object **out, size_t *len_p, git_object_t *type_p,
128132
git_odb *db, const git_oid *id);
129133

134+
/*
135+
* Attempt to get the ODB's commit graph. This object is still owned by the
136+
* ODB. If the repository does not contain a commit graph, it will return zero
137+
* and `*out` will be set to NULL.
138+
*/
139+
int git_odb__get_commit_graph(git_commit_graph_file **out, git_odb *odb);
140+
130141
/* freshen an entry in the object database */
131142
int git_odb__freshen(git_odb *db, const git_oid *id);
132143

0 commit comments

Comments
 (0)