vendor/libgit2/src/pack-objects.c in rugged-0.22.2 vs vendor/libgit2/src/pack-objects.c in rugged-0.23.0b1
- old
+ new
@@ -13,10 +13,12 @@
#include "netops.h"
#include "pack.h"
#include "thread-utils.h"
#include "tree.h"
#include "util.h"
+#include "revwalk.h"
+#include "commit_list.h"
#include "git2/pack.h"
#include "git2/commit.h"
#include "git2/tag.h"
#include "git2/indexer.h"
@@ -37,10 +39,12 @@
struct pack_write_context {
git_indexer *indexer;
git_transfer_progress *stats;
};
+GIT__USE_OIDMAP;
+
#ifdef GIT_THREADS
#define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
int result = git_mutex_##op(&(pb)->mtx); \
assert(!result); \
@@ -122,14 +126,20 @@
pb = git__calloc(1, sizeof(*pb));
GITERR_CHECK_ALLOC(pb);
pb->object_ix = git_oidmap_alloc();
-
if (!pb->object_ix)
goto on_error;
+ pb->walk_objects = git_oidmap_alloc();
+ if (!pb->walk_objects)
+ goto on_error;
+
+ if (git_pool_init(&pb->object_pool, sizeof(git_walk_object), 0) < 0)
+ goto on_error;
+
pb->repo = repo;
pb->nr_threads = 1; /* do not spawn any thread by default */
if (git_hash_ctx_init(&pb->ctx) < 0 ||
git_zstream_init(&pb->zstream) < 0 ||
@@ -188,10 +198,11 @@
int git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid,
const char *name)
{
git_pobject *po;
khiter_t pos;
+ size_t newsize;
int ret;
assert(pb && oid);
/* If the object already exists in the hash table, then we don't
@@ -199,13 +210,22 @@
pos = kh_get(oid, pb->object_ix, oid);
if (pos != kh_end(pb->object_ix))
return 0;
if (pb->nr_objects >= pb->nr_alloc) {
- pb->nr_alloc = (pb->nr_alloc + 1024) * 3 / 2;
- pb->object_list = git__realloc(pb->object_list,
- pb->nr_alloc * sizeof(*po));
+ GITERR_CHECK_ALLOC_ADD(&newsize, pb->nr_alloc, 1024);
+ GITERR_CHECK_ALLOC_MULTIPLY(&newsize, newsize, 3 / 2);
+
+ if (!git__is_uint32(newsize)) {
+ giterr_set(GITERR_NOMEMORY, "Packfile too large to fit in memory.");
+ return -1;
+ }
+
+ pb->nr_alloc = (uint32_t)newsize;
+
+ pb->object_list = git__reallocarray(pb->object_list,
+ pb->nr_alloc, sizeof(*po));
GITERR_CHECK_ALLOC(pb->object_list);
rehash(pb);
}
po = pb->object_list + pb->nr_objects;
@@ -497,12 +517,14 @@
}
static git_pobject **compute_write_order(git_packbuilder *pb)
{
unsigned int i, wo_end, last_untagged;
+ git_pobject **wo;
- git_pobject **wo = git__malloc(sizeof(*wo) * pb->nr_objects);
+ if ((wo = git__mallocarray(pb->nr_objects, sizeof(*wo))) == NULL)
+ return NULL;
for (i = 0; i < pb->nr_objects; i++) {
git_pobject *po = pb->object_list + i;
po->tagged = 0;
po->filled = 0;
@@ -768,14 +790,17 @@
}
*mem_usage += sz;
}
if (!src->data) {
- if (git_odb_read(&obj, pb->odb, &src_object->id) < 0)
+ size_t obj_sz;
+
+ if (git_odb_read(&obj, pb->odb, &src_object->id) < 0 ||
+ !git__is_ulong(obj_sz = git_odb_object_size(obj)))
return -1;
- sz = (unsigned long)git_odb_object_size(obj);
+ sz = (unsigned long)obj_sz;
src->data = git__malloc(sz);
GITERR_CHECK_ALLOC(src->data);
memcpy(src->data, git_odb_object_data(obj), sz);
git_odb_object_free(obj);
@@ -815,15 +840,18 @@
git__free(trg_object->delta_data);
pb->delta_cache_size -= trg_object->delta_size;
trg_object->delta_data = NULL;
}
if (delta_cacheable(pb, src_size, trg_size, delta_size)) {
- pb->delta_cache_size += delta_size;
+ bool overflow = git__add_uint64_overflow(
+ &pb->delta_cache_size, pb->delta_cache_size, delta_size);
+
git_packbuilder__cache_unlock(pb);
- trg_object->delta_data = git__realloc(delta_buf, delta_size);
- GITERR_CHECK_ALLOC(trg_object->delta_data);
+ if (overflow ||
+ !(trg_object->delta_data = git__realloc(delta_buf, delta_size)))
+ return -1;
} else {
/* create delta when writing the pack */
git_packbuilder__cache_unlock(pb);
git__free(delta_buf);
}
@@ -1086,11 +1114,11 @@
if (pb->nr_threads <= 1) {
find_deltas(pb, list, &list_size, window, depth);
return 0;
}
- p = git__malloc(pb->nr_threads * sizeof(*p));
+ p = git__mallocarray(pb->nr_threads, sizeof(*p));
GITERR_CHECK_ALLOC(p);
/* Partition the work among the threads */
for (i = 0; i < pb->nr_threads; ++i) {
unsigned sub_size = list_size / (pb->nr_threads - i);
@@ -1237,11 +1265,11 @@
* at least report that we are in the deltafication stage
*/
if (pb->progress_cb)
pb->progress_cb(GIT_PACKBUILDER_DELTAFICATION, 0, pb->nr_objects, pb->progress_cb_payload);
- delta_list = git__malloc(pb->nr_objects * sizeof(*delta_list));
+ delta_list = git__mallocarray(pb->nr_objects, sizeof(*delta_list));
GITERR_CHECK_ALLOC(delta_list);
for (i = 0; i < pb->nr_objects; ++i) {
git_pobject *po = pb->object_list + i;
@@ -1325,10 +1353,11 @@
const git_oid *git_packbuilder_hash(git_packbuilder *pb)
{
return &pb->pack_oid;
}
+
static int cb_tree_walk(
const char *root, const git_tree_entry *entry, void *payload)
{
int error;
struct tree_walk_context *ctx = payload;
@@ -1383,10 +1412,239 @@
uint32_t git_packbuilder_written(git_packbuilder *pb)
{
return pb->nr_written;
}
+int lookup_walk_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
+{
+ git_walk_object *obj;
+
+ obj = git_pool_mallocz(&pb->object_pool, 1);
+ if (!obj) {
+ giterr_set_oom();
+ return -1;
+ }
+
+ git_oid_cpy(&obj->id, id);
+
+ *out = obj;
+ return 0;
+}
+
+static int retrieve_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
+{
+ int error;
+ khiter_t pos;
+ git_walk_object *obj;
+
+ pos = git_oidmap_lookup_index(pb->walk_objects, id);
+ if (git_oidmap_valid_index(pb->walk_objects, pos)) {
+ obj = git_oidmap_value_at(pb->walk_objects, pos);
+ } else {
+ if ((error = lookup_walk_object(&obj, pb, id)) < 0)
+ return error;
+
+ git_oidmap_insert(pb->walk_objects, &obj->id, obj, error);
+ }
+
+ *out = obj;
+ return 0;
+}
+
+static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
+{
+ int error;
+ git_walk_object *obj;
+
+ if ((error = retrieve_object(&obj, pb, id)) < 0)
+ return error;
+
+ obj->uninteresting = 1;
+
+ return 0;
+}
+
+static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
+{
+ git_walk_object *obj;
+ git_tree *tree;
+ int error;
+ size_t i;
+
+ if ((error = retrieve_object(&obj, pb, id)) < 0)
+ return error;
+
+ if (obj->uninteresting)
+ return 0;
+
+ obj->uninteresting = 1;
+
+ if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0)
+ return error;
+
+ for (i = 0; i < git_tree_entrycount(tree); i++) {
+ const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
+ const git_oid *entry_id = git_tree_entry_id(entry);
+ switch (git_tree_entry_type(entry)) {
+ case GIT_OBJ_TREE:
+ if ((error = mark_tree_uninteresting(pb, entry_id)) < 0)
+ goto cleanup;
+ break;
+ case GIT_OBJ_BLOB:
+ if ((error = mark_blob_uninteresting(pb, entry_id)) < 0)
+ goto cleanup;
+ break;
+ default:
+ /* it's a submodule or something unknown, we don't want it */
+ ;
+ }
+ }
+
+cleanup:
+ git_tree_free(tree);
+ return error;
+}
+
+/*
+ * Mark the edges of the graph uninteresting. Since we start from a
+ * git_revwalk, the commits are already uninteresting, but we need to
+ * mark the trees and blobs.
+ */
+static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits)
+{
+ int error;
+ git_commit_list *list;
+ git_commit *commit;
+
+ for (list = commits; list; list = list->next) {
+ if (!list->item->uninteresting)
+ continue;
+
+ if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0)
+ return error;
+
+ error = mark_tree_uninteresting(pb, git_commit_tree_id(commit));
+ git_commit_free(commit);
+
+ if (error < 0)
+ return error;
+ }
+
+ return 0;
+}
+
+int insert_tree(git_packbuilder *pb, git_tree *tree)
+{
+ size_t i;
+ int error;
+ git_tree *subtree;
+ git_walk_object *obj;
+ const char *name;
+
+ if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
+ return error;
+
+ if (obj->seen)
+ return 0;
+
+ obj->seen = 1;
+
+ if ((error = git_packbuilder_insert(pb, &obj->id, NULL)))
+ return error;
+
+ for (i = 0; i < git_tree_entrycount(tree); i++) {
+ const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
+ const git_oid *entry_id = git_tree_entry_id(entry);
+ switch (git_tree_entry_type(entry)) {
+ case GIT_OBJ_TREE:
+ if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0)
+ return error;
+
+ error = insert_tree(pb, subtree);
+ git_tree_free(subtree);
+
+ if (error < 0)
+ return error;
+
+ break;
+ case GIT_OBJ_BLOB:
+ name = git_tree_entry_name(entry);
+ if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0)
+ return error;
+ break;
+ default:
+ /* it's a submodule or something unknown, we don't want it */
+ ;
+ }
+ }
+
+
+ return error;
+}
+
+int insert_commit(git_packbuilder *pb, git_walk_object *obj)
+{
+ int error;
+ git_commit *commit = NULL;
+ git_tree *tree = NULL;
+
+ obj->seen = 1;
+
+ if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0)
+ return error;
+
+ if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0)
+ return error;
+
+ if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0)
+ goto cleanup;
+
+ if ((error = insert_tree(pb, tree)) < 0)
+ goto cleanup;
+
+cleanup:
+ git_commit_free(commit);
+ git_tree_free(tree);
+ return error;
+}
+
+int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
+{
+ int error;
+ git_oid id;
+ git_walk_object *obj;
+
+ assert(pb && walk);
+
+ if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0)
+ return error;
+
+ /*
+ * TODO: git marks the parents of the edges
+ * uninteresting. This may provide a speed advantage, but does
+ * seem to assume the remote does not have a single-commit
+ * history on the other end.
+ */
+
+ /* walk down each tree up to the blobs and insert them, stopping when uninteresting */
+ while ((error = git_revwalk_next(&id, walk)) == 0) {
+ if ((error = retrieve_object(&obj, pb, &id)) < 0)
+ return error;
+
+ if (obj->seen || obj->uninteresting)
+ continue;
+
+ if ((error = insert_commit(pb, obj)) < 0)
+ return error;
+ }
+
+ if (error == GIT_ITEROVER)
+ error = 0;
+
+ return 0;
+}
+
int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload)
{
if (!pb)
return -1;
@@ -1415,9 +1673,12 @@
if (pb->object_ix)
git_oidmap_free(pb->object_ix);
if (pb->object_list)
git__free(pb->object_list);
+
+ git_oidmap_free(pb->walk_objects);
+ git_pool_clear(&pb->object_pool);
git_hash_ctx_cleanup(&pb->ctx);
git_zstream_free(&pb->zstream);
git__free(pb);