ext/zstdruby/libzstd/compress/zstd_opt.c in zstd-ruby-1.3.7.0 vs ext/zstdruby/libzstd/compress/zstd_opt.c in zstd-ruby-1.3.8.0

- old
+ new

@@ -15,11 +15,13 @@ #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ #define ZSTD_MAX_PRICE (1<<30) +#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ + /*-************************************* * Price functions for optimal parser ***************************************/ #if 0 /* approximation at bit level */ @@ -50,48 +52,67 @@ U32 const weight = BWeight + FWeight; assert(hb + BITCOST_ACCURACY < 31); return weight; } -/* debugging function, @return price in bytes */ +#if (DEBUGLEVEL>=2) +/* debugging function, + * @return price in bytes as fractional value + * for debug messages only */ MEM_STATIC double ZSTD_fCost(U32 price) { return (double)price / (BITCOST_MULTIPLIER*8); } +#endif static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) { optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); } -static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus) +/* ZSTD_downscaleStat() : + * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) + * return the resulting sum of elements */ +static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) { U32 s, sum=0; + DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); - for (s=0; s<=lastEltIndex; s++) { + for (s=0; s<lastEltIndex+1; s++) { table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus)); sum += table[s]; } return sum; } -static void ZSTD_rescaleFreqs(optState_t* const optPtr, - const BYTE* const src, size_t const srcSize, - int optLevel) +/* ZSTD_rescaleFreqs() : + * if first block (detected by optPtr->litLengthSum == 0) : init statistics + * take hints from dictionary if there is one + * or init from zero, using src for literals stats, or flat 1 for match symbols + * otherwise downscale existing stats, to be used as seed for next block. + */ +static void +ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize, + int const optLevel) { + DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); optPtr->priceType = zop_dynamic; if (optPtr->litLengthSum == 0) { /* first block : init */ - if (srcSize <= 1024) /* heuristic */ + if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ + DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); optPtr->priceType = zop_predef; + } assert(optPtr->symbolCosts != NULL); - if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */ + if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { + /* huffman table presumed generated by dictionary */ optPtr->priceType = zop_dynamic; assert(optPtr->litFreq != NULL); optPtr->litSum = 0; { unsigned lit; @@ -206,11 +227,13 @@ { if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); - return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel)); + return (LL_bits[llCode] * BITCOST_MULTIPLIER) + + optPtr->litLengthSumBasePrice + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); } } /* ZSTD_litLengthContribution() : * @return ( cost(litlength) - cost(0) ) @@ -251,11 +274,11 @@ * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, - const optState_t* const optPtr, + const optState_t* const optPtr, int const optLevel) { U32 price; U32 const offCode = ZSTD_highbit32(offset+1); U32 const mlBase = matchLength - MINMATCH; @@ -383,11 +406,10 @@ const U32 btLow = btMask >= current ? 0 : current - btMask; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = smallerPtr + 1; U32 dummy32; /* to be nullified at the end */ U32 const windowLow = ms->window.lowLimit; - U32 const matchLow = windowLow ? windowLow : 1; U32 matchEndIdx = current+8+1; size_t bestLength = 8; U32 nbCompares = 1U << cParams->searchLog; #ifdef ZSTD_C_PREDICT U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); @@ -399,11 +421,12 @@ DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); assert(ip <= iend-8); /* required for h calculation */ hashTable[h] = current; /* Update Hash Table */ - while (nbCompares-- && (matchIndex >= matchLow)) { + assert(windowLow > 0); + while (nbCompares-- && (matchIndex >= windowLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ assert(matchIndex < current); #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ @@ -477,28 +500,31 @@ const U32 mls, const ZSTD_dictMode_e dictMode) { const BYTE* const base = ms->window.base; U32 const target = (U32)(ip - base); U32 idx = ms->nextToUpdate; - DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", + DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", idx, target, dictMode); while(idx < target) idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); ms->nextToUpdate = target; } void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { - ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.searchLength, ZSTD_noDict); + ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); } FORCE_INLINE_TEMPLATE U32 ZSTD_insertBtAndGetAllMatches ( ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, - U32 rep[ZSTD_REP_NUM], U32 const ll0, - ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */) + U32 rep[ZSTD_REP_NUM], + U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ + ZSTD_match_t* matches, + const U32 lengthToBeat, + U32 const mls /* template */) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); const BYTE* const base = ms->window.base; U32 const current = (U32)(ip-base); @@ -540,10 +566,11 @@ size_t bestLength = lengthToBeat-1; DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current); /* check repCode */ + assert(ll0 <= 1); /* necessarily 1 or 0 */ { U32 const lastR = ZSTD_REP_NUM + ll0; U32 repCode; for (repCode = ll0; repCode < lastR; repCode++) { U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; U32 const repIndex = current - repOffset; @@ -722,11 +749,11 @@ const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, U32 rep[ZSTD_REP_NUM], U32 const ll0, ZSTD_match_t* matches, U32 const lengthToBeat) { const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const matchLengthSearch = cParams->searchLength; + U32 const matchLengthSearch = cParams->minMatch; DEBUGLOG(8, "ZSTD_BtGetAllMatches"); if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); switch(matchLengthSearch) { @@ -772,16 +799,34 @@ static U32 ZSTD_totalLen(ZSTD_optimal_t sol) { return sol.litlen + sol.mlen; } +#if 0 /* debug */ + +static void +listStats(const U32* table, int lastEltID) +{ + int const nbElts = lastEltID + 1; + int enb; + for (enb=0; enb < nbElts; enb++) { + (void)table; + //RAWLOG(2, "%3i:%3i, ", enb, table[enb]); + RAWLOG(2, "%4i,", table[enb]); + } + RAWLOG(2, " \n"); +} + +#endif + FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const int optLevel, const ZSTD_dictMode_e dictMode) + const void* src, size_t srcSize, + const int optLevel, + const ZSTD_dictMode_e dictMode) { optState_t* const optStatePtr = &ms->opt; const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; @@ -790,18 +835,19 @@ const BYTE* const base = ms->window.base; const BYTE* const prefixStart = base + ms->window.dictLimit; const ZSTD_compressionParameters* const cParams = &ms->cParams; U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); - U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4; + U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; ZSTD_optimal_t* const opt = optStatePtr->priceTable; ZSTD_match_t* const matches = optStatePtr->matchTable; ZSTD_optimal_t lastSequence; /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_opt_generic"); + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", + (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); assert(optLevel <= 2); ms->nextToUpdate3 = ms->nextToUpdate; ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); ip += (ip==prefixStart); @@ -997,11 +1043,11 @@ U32 const llen = opt[storePos].litlen; U32 const mlen = opt[storePos].mlen; U32 const offCode = opt[storePos].off; U32 const advance = llen + mlen; DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", - anchor - istart, llen, mlen); + anchor - istart, (unsigned)llen, (unsigned)mlen); if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ assert(storePos == storeEnd); /* must be last sequence */ ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ continue; /* will finish */ @@ -1045,15 +1091,15 @@ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); } /* used in 2-pass strategy */ -static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus) +static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) { U32 s, sum=0; - assert(ZSTD_FREQ_DIV+bonus > 0); - for (s=0; s<=lastEltIndex; s++) { + assert(ZSTD_FREQ_DIV+bonus >= 0); + for (s=0; s<lastEltIndex+1; s++) { table[s] <<= ZSTD_FREQ_DIV+bonus; table[s]--; sum += table[s]; } return sum; @@ -1061,47 +1107,82 @@ /* used in 2-pass strategy */ MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) { optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); - optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1); - optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1); - optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1); + optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); } +/* ZSTD_initStats_ultra(): + * make a first compression pass, just to seed stats with more accurate starting values. + * only works on first block, with no dictionary and no ldm. + * this function cannot error, hence its constract must be respected. + */ +static void +ZSTD_initStats_ultra(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ + memcpy(tmpRep, rep, sizeof(tmpRep)); + + DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + assert(ms->opt.litLengthSum == 0); /* first block */ + assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ + assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ + assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ + + ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + + /* invalidate first scan from history */ + ZSTD_resetSeqStore(seqStore); + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; + ms->nextToUpdate3 = ms->window.dictLimit; + + /* re-inforce weight of collected statistics */ + ZSTD_upscaleStats(&ms->opt); +} + size_t ZSTD_compressBlock_btultra( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); -#if 0 - /* 2-pass strategy (disabled) + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 const current = (U32)((const BYTE*)src - ms->window.base); + DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); + + /* 2-pass strategy: * this strategy makes a first pass over first block to collect statistics * and seed next round's statistics with it. + * After 1st pass, function forgets everything, and starts a new block. + * Consequently, this can only work if no data has been previously loaded in tables, + * aka, no dictionary, no prefix, no ldm preprocessing. * The compression ratio gain is generally small (~0.5% on first block), * the cost is 2x cpu time on first block. */ assert(srcSize <= ZSTD_BLOCKSIZE_MAX); if ( (ms->opt.litLengthSum==0) /* first block */ - && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ - && (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */ - U32 tmpRep[ZSTD_REP_NUM]; - DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics"); - assert(ms->nextToUpdate >= ms->window.dictLimit - && ms->nextToUpdate <= ms->window.dictLimit + 1); - memcpy(tmpRep, rep, sizeof(tmpRep)); - ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ - ZSTD_resetSeqStore(seqStore); - /* invalidate first scan from history */ - ms->window.base -= srcSize; - ms->window.dictLimit += (U32)srcSize; - ms->window.lowLimit = ms->window.dictLimit; - ms->nextToUpdate = ms->window.dictLimit; - ms->nextToUpdate3 = ms->window.dictLimit; - /* re-inforce weight of collected statistics */ - ZSTD_upscaleStats(&ms->opt); + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ + && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) + ) { + ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); } -#endif + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); } size_t ZSTD_compressBlock_btopt_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -1128,5 +1209,9 @@ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); } + +/* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */