vendor/libgit2/src/pack-objects.c in rugged-0.22.2 vs vendor/libgit2/src/pack-objects.c in rugged-0.23.0b1

- old
+ new

@@ -13,10 +13,12 @@ #include "netops.h" #include "pack.h" #include "thread-utils.h" #include "tree.h" #include "util.h" +#include "revwalk.h" +#include "commit_list.h" #include "git2/pack.h" #include "git2/commit.h" #include "git2/tag.h" #include "git2/indexer.h" @@ -37,10 +39,12 @@ struct pack_write_context { git_indexer *indexer; git_transfer_progress *stats; }; +GIT__USE_OIDMAP; + #ifdef GIT_THREADS #define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \ int result = git_mutex_##op(&(pb)->mtx); \ assert(!result); \ @@ -122,14 +126,20 @@ pb = git__calloc(1, sizeof(*pb)); GITERR_CHECK_ALLOC(pb); pb->object_ix = git_oidmap_alloc(); - if (!pb->object_ix) goto on_error; + pb->walk_objects = git_oidmap_alloc(); + if (!pb->walk_objects) + goto on_error; + + if (git_pool_init(&pb->object_pool, sizeof(git_walk_object), 0) < 0) + goto on_error; + pb->repo = repo; pb->nr_threads = 1; /* do not spawn any thread by default */ if (git_hash_ctx_init(&pb->ctx) < 0 || git_zstream_init(&pb->zstream) < 0 || @@ -188,10 +198,11 @@ int git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid, const char *name) { git_pobject *po; khiter_t pos; + size_t newsize; int ret; assert(pb && oid); /* If the object already exists in the hash table, then we don't @@ -199,13 +210,22 @@ pos = kh_get(oid, pb->object_ix, oid); if (pos != kh_end(pb->object_ix)) return 0; if (pb->nr_objects >= pb->nr_alloc) { - pb->nr_alloc = (pb->nr_alloc + 1024) * 3 / 2; - pb->object_list = git__realloc(pb->object_list, - pb->nr_alloc * sizeof(*po)); + GITERR_CHECK_ALLOC_ADD(&newsize, pb->nr_alloc, 1024); + GITERR_CHECK_ALLOC_MULTIPLY(&newsize, newsize, 3 / 2); + + if (!git__is_uint32(newsize)) { + giterr_set(GITERR_NOMEMORY, "Packfile too large to fit in memory."); + return -1; + } + + pb->nr_alloc = (uint32_t)newsize; + + pb->object_list = git__reallocarray(pb->object_list, + pb->nr_alloc, sizeof(*po)); GITERR_CHECK_ALLOC(pb->object_list); rehash(pb); } po = pb->object_list + pb->nr_objects; @@ -497,12 +517,14 @@ } static git_pobject **compute_write_order(git_packbuilder *pb) { unsigned int i, wo_end, last_untagged; + git_pobject **wo; - git_pobject **wo = git__malloc(sizeof(*wo) * pb->nr_objects); + if ((wo = git__mallocarray(pb->nr_objects, sizeof(*wo))) == NULL) + return NULL; for (i = 0; i < pb->nr_objects; i++) { git_pobject *po = pb->object_list + i; po->tagged = 0; po->filled = 0; @@ -768,14 +790,17 @@ } *mem_usage += sz; } if (!src->data) { - if (git_odb_read(&obj, pb->odb, &src_object->id) < 0) + size_t obj_sz; + + if (git_odb_read(&obj, pb->odb, &src_object->id) < 0 || + !git__is_ulong(obj_sz = git_odb_object_size(obj))) return -1; - sz = (unsigned long)git_odb_object_size(obj); + sz = (unsigned long)obj_sz; src->data = git__malloc(sz); GITERR_CHECK_ALLOC(src->data); memcpy(src->data, git_odb_object_data(obj), sz); git_odb_object_free(obj); @@ -815,15 +840,18 @@ git__free(trg_object->delta_data); pb->delta_cache_size -= trg_object->delta_size; trg_object->delta_data = NULL; } if (delta_cacheable(pb, src_size, trg_size, delta_size)) { - pb->delta_cache_size += delta_size; + bool overflow = git__add_uint64_overflow( + &pb->delta_cache_size, pb->delta_cache_size, delta_size); + git_packbuilder__cache_unlock(pb); - trg_object->delta_data = git__realloc(delta_buf, delta_size); - GITERR_CHECK_ALLOC(trg_object->delta_data); + if (overflow || + !(trg_object->delta_data = git__realloc(delta_buf, delta_size))) + return -1; } else { /* create delta when writing the pack */ git_packbuilder__cache_unlock(pb); git__free(delta_buf); } @@ -1086,11 +1114,11 @@ if (pb->nr_threads <= 1) { find_deltas(pb, list, &list_size, window, depth); return 0; } - p = git__malloc(pb->nr_threads * sizeof(*p)); + p = git__mallocarray(pb->nr_threads, sizeof(*p)); GITERR_CHECK_ALLOC(p); /* Partition the work among the threads */ for (i = 0; i < pb->nr_threads; ++i) { unsigned sub_size = list_size / (pb->nr_threads - i); @@ -1237,11 +1265,11 @@ * at least report that we are in the deltafication stage */ if (pb->progress_cb) pb->progress_cb(GIT_PACKBUILDER_DELTAFICATION, 0, pb->nr_objects, pb->progress_cb_payload); - delta_list = git__malloc(pb->nr_objects * sizeof(*delta_list)); + delta_list = git__mallocarray(pb->nr_objects, sizeof(*delta_list)); GITERR_CHECK_ALLOC(delta_list); for (i = 0; i < pb->nr_objects; ++i) { git_pobject *po = pb->object_list + i; @@ -1325,10 +1353,11 @@ const git_oid *git_packbuilder_hash(git_packbuilder *pb) { return &pb->pack_oid; } + static int cb_tree_walk( const char *root, const git_tree_entry *entry, void *payload) { int error; struct tree_walk_context *ctx = payload; @@ -1383,10 +1412,239 @@ uint32_t git_packbuilder_written(git_packbuilder *pb) { return pb->nr_written; } +int lookup_walk_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id) +{ + git_walk_object *obj; + + obj = git_pool_mallocz(&pb->object_pool, 1); + if (!obj) { + giterr_set_oom(); + return -1; + } + + git_oid_cpy(&obj->id, id); + + *out = obj; + return 0; +} + +static int retrieve_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id) +{ + int error; + khiter_t pos; + git_walk_object *obj; + + pos = git_oidmap_lookup_index(pb->walk_objects, id); + if (git_oidmap_valid_index(pb->walk_objects, pos)) { + obj = git_oidmap_value_at(pb->walk_objects, pos); + } else { + if ((error = lookup_walk_object(&obj, pb, id)) < 0) + return error; + + git_oidmap_insert(pb->walk_objects, &obj->id, obj, error); + } + + *out = obj; + return 0; +} + +static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id) +{ + int error; + git_walk_object *obj; + + if ((error = retrieve_object(&obj, pb, id)) < 0) + return error; + + obj->uninteresting = 1; + + return 0; +} + +static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id) +{ + git_walk_object *obj; + git_tree *tree; + int error; + size_t i; + + if ((error = retrieve_object(&obj, pb, id)) < 0) + return error; + + if (obj->uninteresting) + return 0; + + obj->uninteresting = 1; + + if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0) + return error; + + for (i = 0; i < git_tree_entrycount(tree); i++) { + const git_tree_entry *entry = git_tree_entry_byindex(tree, i); + const git_oid *entry_id = git_tree_entry_id(entry); + switch (git_tree_entry_type(entry)) { + case GIT_OBJ_TREE: + if ((error = mark_tree_uninteresting(pb, entry_id)) < 0) + goto cleanup; + break; + case GIT_OBJ_BLOB: + if ((error = mark_blob_uninteresting(pb, entry_id)) < 0) + goto cleanup; + break; + default: + /* it's a submodule or something unknown, we don't want it */ + ; + } + } + +cleanup: + git_tree_free(tree); + return error; +} + +/* + * Mark the edges of the graph uninteresting. Since we start from a + * git_revwalk, the commits are already uninteresting, but we need to + * mark the trees and blobs. + */ +static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits) +{ + int error; + git_commit_list *list; + git_commit *commit; + + for (list = commits; list; list = list->next) { + if (!list->item->uninteresting) + continue; + + if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0) + return error; + + error = mark_tree_uninteresting(pb, git_commit_tree_id(commit)); + git_commit_free(commit); + + if (error < 0) + return error; + } + + return 0; +} + +int insert_tree(git_packbuilder *pb, git_tree *tree) +{ + size_t i; + int error; + git_tree *subtree; + git_walk_object *obj; + const char *name; + + if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0) + return error; + + if (obj->seen) + return 0; + + obj->seen = 1; + + if ((error = git_packbuilder_insert(pb, &obj->id, NULL))) + return error; + + for (i = 0; i < git_tree_entrycount(tree); i++) { + const git_tree_entry *entry = git_tree_entry_byindex(tree, i); + const git_oid *entry_id = git_tree_entry_id(entry); + switch (git_tree_entry_type(entry)) { + case GIT_OBJ_TREE: + if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0) + return error; + + error = insert_tree(pb, subtree); + git_tree_free(subtree); + + if (error < 0) + return error; + + break; + case GIT_OBJ_BLOB: + name = git_tree_entry_name(entry); + if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0) + return error; + break; + default: + /* it's a submodule or something unknown, we don't want it */ + ; + } + } + + + return error; +} + +int insert_commit(git_packbuilder *pb, git_walk_object *obj) +{ + int error; + git_commit *commit = NULL; + git_tree *tree = NULL; + + obj->seen = 1; + + if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0) + return error; + + if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0) + return error; + + if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0) + goto cleanup; + + if ((error = insert_tree(pb, tree)) < 0) + goto cleanup; + +cleanup: + git_commit_free(commit); + git_tree_free(tree); + return error; +} + +int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk) +{ + int error; + git_oid id; + git_walk_object *obj; + + assert(pb && walk); + + if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0) + return error; + + /* + * TODO: git marks the parents of the edges + * uninteresting. This may provide a speed advantage, but does + * seem to assume the remote does not have a single-commit + * history on the other end. + */ + + /* walk down each tree up to the blobs and insert them, stopping when uninteresting */ + while ((error = git_revwalk_next(&id, walk)) == 0) { + if ((error = retrieve_object(&obj, pb, &id)) < 0) + return error; + + if (obj->seen || obj->uninteresting) + continue; + + if ((error = insert_commit(pb, obj)) < 0) + return error; + } + + if (error == GIT_ITEROVER) + error = 0; + + return 0; +} + int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload) { if (!pb) return -1; @@ -1415,9 +1673,12 @@ if (pb->object_ix) git_oidmap_free(pb->object_ix); if (pb->object_list) git__free(pb->object_list); + + git_oidmap_free(pb->walk_objects); + git_pool_clear(&pb->object_pool); git_hash_ctx_cleanup(&pb->ctx); git_zstream_free(&pb->zstream); git__free(pb);