contrib/lz4/lib/lz4opt.h in extlz4-0.2.4.3 vs contrib/lz4/lib/lz4opt.h in extlz4-0.2.5

- old
+ new

@@ -33,334 +33,324 @@ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ #define LZ4_OPT_NUM (1<<12) - typedef struct { - int off; - int len; -} LZ4HC_match_t; - -typedef struct { int price; int off; int mlen; int litlen; } LZ4HC_optimal_t; /* price in bytes */ -FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen) +LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) { - size_t price = litlen; - if (litlen >= (size_t)RUN_MASK) + int price = litlen; + if (litlen >= (int)RUN_MASK) price += 1 + (litlen-RUN_MASK)/255; return price; } /* requires mlen >= MINMATCH */ -FORCE_INLINE size_t LZ4HC_sequencePrice(size_t litlen, size_t mlen) +LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) { - size_t price = 2 + 1; /* 16-bit offset + token */ + int price = 1 + 2 ; /* token + 16-bit offset */ price += LZ4HC_literalsPrice(litlen); - if (mlen >= (size_t)(ML_MASK+MINMATCH)) - price+= 1 + (mlen-(ML_MASK+MINMATCH))/255; + if (mlen >= (int)(ML_MASK+MINMATCH)) + price += 1 + (mlen-(ML_MASK+MINMATCH))/255; return price; } /*-************************************* -* Binary Tree search +* Match finder ***************************************/ -FORCE_INLINE int LZ4HC_BinTree_InsertAndGetAllMatches ( - LZ4HC_CCtx_internal* ctx, - const BYTE* const ip, - const BYTE* const iHighLimit, - size_t best_mlen, - LZ4HC_match_t* matches, - int* matchNum) -{ - U16* const chainTable = ctx->chainTable; - U32* const HashTable = ctx->hashTable; - const BYTE* const base = ctx->base; - const U32 dictLimit = ctx->dictLimit; - const U32 current = (U32)(ip - base); - const U32 lowLimit = (ctx->lowLimit + MAX_DISTANCE > current) ? ctx->lowLimit : current - (MAX_DISTANCE - 1); - const BYTE* const dictBase = ctx->dictBase; - const BYTE* match; - int nbAttempts = ctx->searchNum; - int mnum = 0; - U16 *ptr0, *ptr1, delta0, delta1; - U32 matchIndex; - size_t matchLength = 0; - U32* HashPos; +typedef struct { + int off; + int len; +} LZ4HC_match_t; - if (ip + MINMATCH > iHighLimit) return 1; - - /* HC4 match finder */ - HashPos = &HashTable[LZ4HC_hashPtr(ip)]; - matchIndex = *HashPos; - *HashPos = current; - - ptr0 = &DELTANEXTMAXD(current*2+1); - ptr1 = &DELTANEXTMAXD(current*2); - delta0 = delta1 = (U16)(current - matchIndex); - - while ((matchIndex < current) && (matchIndex>=lowLimit) && (nbAttempts)) { - nbAttempts--; - if (matchIndex >= dictLimit) { - match = base + matchIndex; - matchLength = LZ4_count(ip, match, iHighLimit); - } else { - const BYTE* vLimit = ip + (dictLimit - matchIndex); - match = dictBase + matchIndex; - if (vLimit > iHighLimit) vLimit = iHighLimit; - matchLength = LZ4_count(ip, match, vLimit); - if ((ip+matchLength == vLimit) && (vLimit < iHighLimit)) - matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > best_mlen) { - best_mlen = matchLength; - if (matches) { - if (matchIndex >= dictLimit) - matches[mnum].off = (int)(ip - match); - else - matches[mnum].off = (int)(ip - (base + matchIndex)); /* virtual matchpos */ - matches[mnum].len = (int)matchLength; - mnum++; - } - if (best_mlen > LZ4_OPT_NUM) break; - } - - if (ip+matchLength >= iHighLimit) /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ - - DEBUGLOG(6, "ip :%016llX", (U64)ip); - DEBUGLOG(6, "match:%016llX", (U64)match); - if (*(ip+matchLength) < *(match+matchLength)) { - *ptr0 = delta0; - ptr0 = &DELTANEXTMAXD(matchIndex*2); - if (*ptr0 == (U16)-1) break; - delta0 = *ptr0; - delta1 += delta0; - matchIndex -= delta0; - } else { - *ptr1 = delta1; - ptr1 = &DELTANEXTMAXD(matchIndex*2+1); - if (*ptr1 == (U16)-1) break; - delta1 = *ptr1; - delta0 += delta1; - matchIndex -= delta1; - } - } - - *ptr0 = (U16)-1; - *ptr1 = (U16)-1; - if (matchNum) *matchNum = mnum; - /* if (best_mlen > 8) return best_mlen-8; */ - if (!matchNum) return 1; - return 1; -} - - -FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit) +LZ4_FORCE_INLINE +LZ4HC_match_t LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, + const BYTE* ip, const BYTE* const iHighLimit, + int minLen, int nbSearches) { - const BYTE* const base = ctx->base; - const U32 target = (U32)(ip - base); - U32 idx = ctx->nextToUpdate; - while(idx < target) - idx += LZ4HC_BinTree_InsertAndGetAllMatches(ctx, base+idx, iHighLimit, 8, NULL, NULL); + LZ4HC_match_t match = { 0 , 0 }; + const BYTE* matchPtr = NULL; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + int const matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, + ip, ip, iHighLimit, minLen, &matchPtr, &ip, + nbSearches, 1 /* patternAnalysis */); + if (matchLength <= minLen) return match; + match.len = matchLength; + match.off = (int)(ip-matchPtr); + return match; } -/** Tree updater, providing best match */ -FORCE_INLINE int LZ4HC_BinTree_GetAllMatches ( - LZ4HC_CCtx_internal* ctx, - const BYTE* const ip, const BYTE* const iHighLimit, - size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate) -{ - int mnum = 0; - if (ip < ctx->base + ctx->nextToUpdate) return 0; /* skipped area */ - if (fullUpdate) LZ4HC_updateBinTree(ctx, ip, iHighLimit); - best_mlen = LZ4HC_BinTree_InsertAndGetAllMatches(ctx, ip, iHighLimit, best_mlen, matches, &mnum); - ctx->nextToUpdate = (U32)(ip - ctx->base + best_mlen); - return mnum; -} - - -#define SET_PRICE(pos, ml, offset, ll, cost) \ -{ \ - while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \ - opt[pos].mlen = (int)ml; \ - opt[pos].off = (int)offset; \ - opt[pos].litlen = (int)ll; \ - opt[pos].price = (int)cost; \ -} - - static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, const char* const source, - char* dest, - int inputSize, - int maxOutputSize, - limitedOutput_directive limit, + char* dst, + int* srcSizePtr, + int dstCapacity, + int const nbSearches, size_t sufficient_len, - const int fullUpdate + limitedOutput_directive limit, + int const fullUpdate ) { - LZ4HC_optimal_t opt[LZ4_OPT_NUM + 1]; /* this uses a bit too much stack memory to my taste ... */ - LZ4HC_match_t matches[LZ4_OPT_NUM + 1]; +#define TRAILING_LITERALS 3 + LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* this uses a bit too much stack memory to my taste ... */ const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; - const BYTE* const iend = ip + inputSize; + const BYTE* const iend = ip + *srcSizePtr; const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = (iend - LASTLITERALS); - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + maxOutputSize; + const BYTE* const matchlimit = iend - LASTLITERALS; + BYTE* op = (BYTE*) dst; + BYTE* opSaved = (BYTE*) dst; + BYTE* oend = op + dstCapacity; /* init */ DEBUGLOG(5, "LZ4HC_compress_optimal"); + *srcSizePtr = 0; + if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; - ctx->end += inputSize; - ip++; /* Main Loop */ + assert(ip - anchor < LZ4_MAX_INPUT_SIZE); while (ip < mflimit) { - size_t const llen = ip - anchor; - size_t last_pos = 0; - size_t match_num, cur, best_mlen, best_off; - memset(opt, 0, sizeof(LZ4HC_optimal_t)); /* memset only the first one */ + int const llen = (int)(ip - anchor); + int best_mlen, best_off; + int cur, last_match_pos = 0; - match_num = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); - if (!match_num) { ip++; continue; } + LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches); + if (firstMatch.len==0) { ip++; continue; } - if ((size_t)matches[match_num-1].len > sufficient_len) { + if ((size_t)firstMatch.len > sufficient_len) { /* good enough solution : immediate encoding */ - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - cur = 0; - last_pos = 1; - goto encode; + int const firstML = firstMatch.len; + const BYTE* const matchPos = ip - firstMatch.off; + opSaved = op; + if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */ + goto _dest_overflow; + continue; } - /* set prices using matches at position = 0 */ - { size_t matchNb; - for (matchNb = 0; matchNb < match_num; matchNb++) { - size_t mlen = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH; - best_mlen = matches[matchNb].len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ - for ( ; mlen <= best_mlen ; mlen++) { - size_t const cost = LZ4HC_sequencePrice(llen, mlen) - LZ4HC_literalsPrice(llen); - SET_PRICE(mlen, mlen, matches[matchNb].off, 0, cost); /* updates last_pos and opt[pos] */ - } } } + /* set prices for first positions (literals) */ + { int rPos; + for (rPos = 0 ; rPos < MINMATCH ; rPos++) { + int const cost = LZ4HC_literalsPrice(llen + rPos); + opt[rPos].mlen = 1; + opt[rPos].off = 0; + opt[rPos].litlen = llen + rPos; + opt[rPos].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + rPos, cost, opt[rPos].litlen); + } } + /* set prices using initial match */ + { int mlen = MINMATCH; + int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ + int const offset = firstMatch.off; + assert(matchML < LZ4_OPT_NUM); + for ( ; mlen <= matchML ; mlen++) { + int const cost = LZ4HC_sequencePrice(llen, mlen); + opt[mlen].mlen = mlen; + opt[mlen].off = offset; + opt[mlen].litlen = llen; + opt[mlen].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", + mlen, cost, mlen); + } } + last_match_pos = firstMatch.len; + { int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } - if (last_pos < MINMATCH) { ip++; continue; } /* note : on clang at least, this test improves performance */ - /* check further positions */ - opt[0].mlen = opt[1].mlen = 1; - for (cur = 1; cur <= last_pos; cur++) { + for (cur = 1; cur < last_match_pos; cur++) { const BYTE* const curPtr = ip + cur; + LZ4HC_match_t newMatch; - /* establish baseline price if cur is literal */ - { size_t price, litlen; - if (opt[cur-1].mlen == 1) { - /* no match at previous position */ - litlen = opt[cur-1].litlen + 1; - if (cur > litlen) { - price = opt[cur - litlen].price + LZ4HC_literalsPrice(litlen); - } else { - price = LZ4HC_literalsPrice(llen + litlen) - LZ4HC_literalsPrice(llen); - } - } else { - litlen = 1; - price = opt[cur - 1].price + LZ4HC_literalsPrice(1); - } - - if (price < (size_t)opt[cur].price) - SET_PRICE(cur, 1 /*mlen*/, 0 /*off*/, litlen, price); /* note : increases last_pos */ + if (curPtr >= mflimit) break; + DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", + cur, opt[cur].price, opt[cur+1].price, cur+1); + if (fullUpdate) { + /* not useful to search here if next position has same (or lower) cost */ + if ( (opt[cur+1].price <= opt[cur].price) + /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ + && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) ) + continue; + } else { + /* not useful to search here if next position has same (or lower) cost */ + if (opt[cur+1].price <= opt[cur].price) continue; } - if (cur == last_pos || curPtr >= mflimit) break; + DEBUGLOG(7, "search at rPos:%u", cur); + if (fullUpdate) + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches); + else + /* only test matches of minimum length; slightly faster, but misses a few bytes */ + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches); + if (!newMatch.len) continue; - match_num = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); - if ((match_num > 0) && (size_t)matches[match_num-1].len > sufficient_len) { + if ( ((size_t)newMatch.len > sufficient_len) + || (newMatch.len + cur >= LZ4_OPT_NUM) ) { /* immediate encoding */ - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - last_pos = cur + 1; + best_mlen = newMatch.len; + best_off = newMatch.off; + last_match_pos = cur + 1; goto encode; } - /* set prices using matches at position = cur */ - { size_t matchNb; - for (matchNb = 0; matchNb < match_num; matchNb++) { - size_t ml = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH; - best_mlen = (cur + matches[matchNb].len < LZ4_OPT_NUM) ? - (size_t)matches[matchNb].len : LZ4_OPT_NUM - cur; + /* before match : set price with literals at beginning */ + { int const baseLitlen = opt[cur].litlen; + int litlen; + for (litlen = 1; litlen < MINMATCH; litlen++) { + int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen); + int const pos = cur + litlen; + if (price < opt[pos].price) { + opt[pos].mlen = 1; /* literal */ + opt[pos].off = 0; + opt[pos].litlen = baseLitlen+litlen; + opt[pos].price = price; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", + pos, price, opt[pos].litlen); + } } } - for ( ; ml <= best_mlen ; ml++) { - size_t ll, price; - if (opt[cur].mlen == 1) { - ll = opt[cur].litlen; - if (cur > ll) - price = opt[cur - ll].price + LZ4HC_sequencePrice(ll, ml); - else - price = LZ4HC_sequencePrice(llen + ll, ml) - LZ4HC_literalsPrice(llen); - } else { - ll = 0; - price = opt[cur].price + LZ4HC_sequencePrice(0, ml); - } + /* set prices using match at position = cur */ + { int const matchML = newMatch.len; + int ml = MINMATCH; - if (cur + ml > last_pos || price < (size_t)opt[cur + ml].price) { - SET_PRICE(cur + ml, ml, matches[matchNb].off, ll, price); - } } } } - } /* for (cur = 1; cur <= last_pos; cur++) */ + assert(cur + newMatch.len < LZ4_OPT_NUM); + for ( ; ml <= matchML ; ml++) { + int const pos = cur + ml; + int const offset = newMatch.off; + int price; + int ll; + DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", + pos, last_match_pos); + if (opt[cur].mlen == 1) { + ll = opt[cur].litlen; + price = ((cur > ll) ? opt[cur - ll].price : 0) + + LZ4HC_sequencePrice(ll, ml); + } else { + ll = 0; + price = opt[cur].price + LZ4HC_sequencePrice(0, ml); + } - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; + if (pos > last_match_pos+TRAILING_LITERALS || price <= opt[pos].price) { + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", + pos, price, ml); + assert(pos < LZ4_OPT_NUM); + if ( (ml == matchML) /* last pos of last match */ + && (last_match_pos < pos) ) + last_match_pos = pos; + opt[pos].mlen = ml; + opt[pos].off = offset; + opt[pos].litlen = ll; + opt[pos].price = price; + } } } + /* complete following positions with literals */ + { int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } + } /* for (cur = 1; cur <= last_match_pos; cur++) */ -encode: /* cur, last_pos, best_mlen, best_off must be set */ - opt[0].mlen = 1; - while (1) { /* from end to beginning */ - size_t const ml = opt[cur].mlen; - int const offset = opt[cur].off; - opt[cur].mlen = (int)best_mlen; - opt[cur].off = (int)best_off; - best_mlen = ml; - best_off = offset; - if (ml > cur) break; /* can this happen ? */ - cur -= ml; - } + best_mlen = opt[last_match_pos].mlen; + best_off = opt[last_match_pos].off; + cur = last_match_pos - best_mlen; - /* encode all recorded sequences */ - cur = 0; - while (cur < last_pos) { - int const ml = opt[cur].mlen; - int const offset = opt[cur].off; - if (ml == 1) { ip++; cur++; continue; } - cur += ml; - if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) return 0; - } +encode: /* cur, last_match_pos, best_mlen, best_off must be set */ + assert(cur < LZ4_OPT_NUM); + assert(last_match_pos >= 1); /* == 1 when only one candidate */ + DEBUGLOG(6, "reverse traversal, looking for shortest path") + DEBUGLOG(6, "last_match_pos = %i", last_match_pos); + { int candidate_pos = cur; + int selected_matchLength = best_mlen; + int selected_offset = best_off; + while (1) { /* from end to beginning */ + int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ + int const next_offset = opt[candidate_pos].off; + DEBUGLOG(6, "pos %i: sequence length %i", candidate_pos, selected_matchLength); + opt[candidate_pos].mlen = selected_matchLength; + opt[candidate_pos].off = selected_offset; + selected_matchLength = next_matchLength; + selected_offset = next_offset; + if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ + assert(next_matchLength > 0); /* can be 1, means literal */ + candidate_pos -= next_matchLength; + } } + + /* encode all recorded sequences in order */ + { int rPos = 0; /* relative position (to ip) */ + while (rPos < last_match_pos) { + int const ml = opt[rPos].mlen; + int const offset = opt[rPos].off; + if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ + rPos += ml; + assert(ml >= MINMATCH); + assert((offset >= 1) && (offset <= MAX_DISTANCE)); + opSaved = op; + if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */ + goto _dest_overflow; + } } } /* while (ip < mflimit) */ +_last_literals: /* Encode Last Literals */ - { int lastRun = (int)(iend - anchor); - if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } - else *op++ = (BYTE)(lastRun<<ML_BITS); - memcpy(op, anchor, iend - anchor); - op += iend-anchor; + { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + litLength + lastRunSize; + if (limit == limitedDestSize) oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) return 0; /* Check output limit */ + /* adapt lastRunSize to fill 'dst' */ + lastRunSize = (size_t)(oend - op) - 1; + litLength = (lastRunSize + 255 - RUN_MASK) / 255; + lastRunSize -= litLength; + } + ip = anchor + lastRunSize; + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + memcpy(op, anchor, lastRunSize); + op += lastRunSize; } /* End */ - return (int) ((char*)op-dest); + *srcSizePtr = (int) (((const char*)ip) - source); + return (int) ((char*)op-dst); + +_dest_overflow: + if (limit == limitedDestSize) { + op = opSaved; /* restore correct out pointer */ + goto _last_literals; + } + return 0; }