vendor/libgit2/src/hashsig.c in rugged-0.22.2 vs vendor/libgit2/src/hashsig.c in rugged-0.23.0b1

- old
+ new

@@ -32,12 +32,12 @@ } hashsig_heap; struct git_hashsig { hashsig_heap mins; hashsig_heap maxs; + size_t lines; git_hashsig_option_t opt; - int considered; }; #define HEAP_LCHILD_OF(I) (((I)<<1)+1) #define HEAP_RCHILD_OF(I) (((I)<<1)+2) #define HEAP_PARENT_OF(I) (((I)-1)>>1) @@ -133,29 +133,27 @@ static void hashsig_in_progress_init( hashsig_in_progress *prog, git_hashsig *sig) { int i; - switch (sig->opt) { - case GIT_HASHSIG_IGNORE_WHITESPACE: + /* no more than one can be set */ + assert(!(sig->opt & GIT_HASHSIG_IGNORE_WHITESPACE) || + !(sig->opt & GIT_HASHSIG_SMART_WHITESPACE)); + + if (sig->opt & GIT_HASHSIG_IGNORE_WHITESPACE) { for (i = 0; i < 256; ++i) prog->ignore_ch[i] = git__isspace_nonlf(i); prog->use_ignores = 1; - break; - case GIT_HASHSIG_SMART_WHITESPACE: + } else if (sig->opt & GIT_HASHSIG_SMART_WHITESPACE) { for (i = 0; i < 256; ++i) prog->ignore_ch[i] = git__isspace(i); prog->use_ignores = 1; - break; - default: + } else { memset(prog, 0, sizeof(*prog)); - break; } } -#define HASHSIG_IN_PROGRESS_INIT { 1 } - static int hashsig_add_hashes( git_hashsig *sig, const uint8_t *data, size_t size, hashsig_in_progress *prog) @@ -172,36 +170,37 @@ ch = *scan; if (use_ignores) for (; scan < end && git__isspace_nonlf(ch); ch = *scan) ++scan; - else if (sig->opt != GIT_HASHSIG_NORMAL) + else if (sig->opt & + (GIT_HASHSIG_IGNORE_WHITESPACE | GIT_HASHSIG_SMART_WHITESPACE)) for (; scan < end && ch == '\r'; ch = *scan) ++scan; /* peek at next character to decide what to do next */ - if (sig->opt == GIT_HASHSIG_SMART_WHITESPACE) + if (sig->opt & GIT_HASHSIG_SMART_WHITESPACE) use_ignores = (ch == '\n'); if (scan >= end) break; ++scan; /* check run terminator */ - if (ch == '\n' || ch == '\0') + if (ch == '\n' || ch == '\0') { + sig->lines++; break; + } ++len; HASHSIG_HASH_MIX(state, ch); } if (len > 0) { hashsig_heap_insert(&sig->mins, (hashsig_t)state); hashsig_heap_insert(&sig->maxs, (hashsig_t)state); - sig->considered++; - while (scan < end && (*scan == '\n' || !*scan)) ++scan; } } @@ -210,11 +209,12 @@ return 0; } static int hashsig_finalize_hashes(git_hashsig *sig) { - if (sig->mins.size < HASHSIG_HEAP_MIN_SIZE) { + if (sig->mins.size < HASHSIG_HEAP_MIN_SIZE && + !(sig->opt & GIT_HASHSIG_ALLOW_SMALL_FILES)) { giterr_set(GITERR_INVALID, "File too small for similarity signature calculation"); return GIT_EBUFS; } @@ -334,9 +334,21 @@ return HASHSIG_SCALE * (matches * 2) / (a->size + b->size); } int git_hashsig_compare(const git_hashsig *a, const git_hashsig *b) { + /* if we have no elements in either file then each file is either + * empty or blank. if we're ignoring whitespace then the files are + * similar, otherwise they're dissimilar. + */ + if (a->mins.size == 0 && b->mins.size == 0) { + if ((!a->lines && !b->lines) || + (a->opt & GIT_HASHSIG_IGNORE_WHITESPACE)) + return HASHSIG_SCALE; + else + return 0; + } + /* if we have fewer than the maximum number of elements, then just use * one array since the two arrays will be the same */ if (a->mins.size < HASHSIG_HEAP_SIZE) return hashsig_heap_compare(&a->mins, &b->mins);