ext/zstdruby/libzstd/compress/zstd_opt.c in zstd-ruby-1.3.7.0 vs ext/zstdruby/libzstd/compress/zstd_opt.c in zstd-ruby-1.3.8.0
- old
+ new
@@ -15,11 +15,13 @@
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
#define ZSTD_MAX_PRICE (1<<30)
+#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
+
/*-*************************************
* Price functions for optimal parser
***************************************/
#if 0 /* approximation at bit level */
@@ -50,48 +52,67 @@
U32 const weight = BWeight + FWeight;
assert(hb + BITCOST_ACCURACY < 31);
return weight;
}
-/* debugging function, @return price in bytes */
+#if (DEBUGLEVEL>=2)
+/* debugging function,
+ * @return price in bytes as fractional value
+ * for debug messages only */
MEM_STATIC double ZSTD_fCost(U32 price)
{
return (double)price / (BITCOST_MULTIPLIER*8);
}
+#endif
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
{
optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
}
-static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus)
+/* ZSTD_downscaleStat() :
+ * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
+ * return the resulting sum of elements */
+static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
{
U32 s, sum=0;
+ DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
- for (s=0; s<=lastEltIndex; s++) {
+ for (s=0; s<lastEltIndex+1; s++) {
table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
sum += table[s];
}
return sum;
}
-static void ZSTD_rescaleFreqs(optState_t* const optPtr,
- const BYTE* const src, size_t const srcSize,
- int optLevel)
+/* ZSTD_rescaleFreqs() :
+ * if first block (detected by optPtr->litLengthSum == 0) : init statistics
+ * take hints from dictionary if there is one
+ * or init from zero, using src for literals stats, or flat 1 for match symbols
+ * otherwise downscale existing stats, to be used as seed for next block.
+ */
+static void
+ZSTD_rescaleFreqs(optState_t* const optPtr,
+ const BYTE* const src, size_t const srcSize,
+ int const optLevel)
{
+ DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
optPtr->priceType = zop_dynamic;
if (optPtr->litLengthSum == 0) { /* first block : init */
- if (srcSize <= 1024) /* heuristic */
+ if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
+ DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
optPtr->priceType = zop_predef;
+ }
assert(optPtr->symbolCosts != NULL);
- if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
+ if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
+ /* huffman table presumed generated by dictionary */
optPtr->priceType = zop_dynamic;
assert(optPtr->litFreq != NULL);
optPtr->litSum = 0;
{ unsigned lit;
@@ -206,11 +227,13 @@
{
if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
/* dynamic statistics */
{ U32 const llCode = ZSTD_LLcode(litLength);
- return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel));
+ return (LL_bits[llCode] * BITCOST_MULTIPLIER)
+ + optPtr->litLengthSumBasePrice
+ - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
}
}
/* ZSTD_litLengthContribution() :
* @return ( cost(litlength) - cost(0) )
@@ -251,11 +274,11 @@
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
* optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offset,
U32 const matchLength,
- const optState_t* const optPtr,
+ const optState_t* const optPtr,
int const optLevel)
{
U32 price;
U32 const offCode = ZSTD_highbit32(offset+1);
U32 const mlBase = matchLength - MINMATCH;
@@ -383,11 +406,10 @@
const U32 btLow = btMask >= current ? 0 : current - btMask;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = smallerPtr + 1;
U32 dummy32; /* to be nullified at the end */
U32 const windowLow = ms->window.lowLimit;
- U32 const matchLow = windowLow ? windowLow : 1;
U32 matchEndIdx = current+8+1;
size_t bestLength = 8;
U32 nbCompares = 1U << cParams->searchLog;
#ifdef ZSTD_C_PREDICT
U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
@@ -399,11 +421,12 @@
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
assert(ip <= iend-8); /* required for h calculation */
hashTable[h] = current; /* Update Hash Table */
- while (nbCompares-- && (matchIndex >= matchLow)) {
+ assert(windowLow > 0);
+ while (nbCompares-- && (matchIndex >= windowLow)) {
U32* const nextPtr = bt + 2*(matchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
assert(matchIndex < current);
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
@@ -477,28 +500,31 @@
const U32 mls, const ZSTD_dictMode_e dictMode)
{
const BYTE* const base = ms->window.base;
U32 const target = (U32)(ip - base);
U32 idx = ms->nextToUpdate;
- DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
+ DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
idx, target, dictMode);
while(idx < target)
idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
ms->nextToUpdate = target;
}
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
- ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.searchLength, ZSTD_noDict);
+ ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
}
FORCE_INLINE_TEMPLATE
U32 ZSTD_insertBtAndGetAllMatches (
ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
- U32 rep[ZSTD_REP_NUM], U32 const ll0,
- ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
+ U32 rep[ZSTD_REP_NUM],
+ U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
+ ZSTD_match_t* matches,
+ const U32 lengthToBeat,
+ U32 const mls /* template */)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
@@ -540,10 +566,11 @@
size_t bestLength = lengthToBeat-1;
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
/* check repCode */
+ assert(ll0 <= 1); /* necessarily 1 or 0 */
{ U32 const lastR = ZSTD_REP_NUM + ll0;
U32 repCode;
for (repCode = ll0; repCode < lastR; repCode++) {
U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
U32 const repIndex = current - repOffset;
@@ -722,11 +749,11 @@
const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
U32 rep[ZSTD_REP_NUM], U32 const ll0,
ZSTD_match_t* matches, U32 const lengthToBeat)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
- U32 const matchLengthSearch = cParams->searchLength;
+ U32 const matchLengthSearch = cParams->minMatch;
DEBUGLOG(8, "ZSTD_BtGetAllMatches");
if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
switch(matchLengthSearch)
{
@@ -772,16 +799,34 @@
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
{
return sol.litlen + sol.mlen;
}
+#if 0 /* debug */
+
+static void
+listStats(const U32* table, int lastEltID)
+{
+ int const nbElts = lastEltID + 1;
+ int enb;
+ for (enb=0; enb < nbElts; enb++) {
+ (void)table;
+ //RAWLOG(2, "%3i:%3i, ", enb, table[enb]);
+ RAWLOG(2, "%4i,", table[enb]);
+ }
+ RAWLOG(2, " \n");
+}
+
+#endif
+
FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
- const void* src, size_t srcSize,
- const int optLevel, const ZSTD_dictMode_e dictMode)
+ const void* src, size_t srcSize,
+ const int optLevel,
+ const ZSTD_dictMode_e dictMode)
{
optState_t* const optStatePtr = &ms->opt;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
@@ -790,18 +835,19 @@
const BYTE* const base = ms->window.base;
const BYTE* const prefixStart = base + ms->window.dictLimit;
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
- U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
+ U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
ZSTD_match_t* const matches = optStatePtr->matchTable;
ZSTD_optimal_t lastSequence;
/* init */
- DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
+ DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
+ (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
assert(optLevel <= 2);
ms->nextToUpdate3 = ms->nextToUpdate;
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
ip += (ip==prefixStart);
@@ -997,11 +1043,11 @@
U32 const llen = opt[storePos].litlen;
U32 const mlen = opt[storePos].mlen;
U32 const offCode = opt[storePos].off;
U32 const advance = llen + mlen;
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
- anchor - istart, llen, mlen);
+ anchor - istart, (unsigned)llen, (unsigned)mlen);
if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
assert(storePos == storeEnd); /* must be last sequence */
ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
continue; /* will finish */
@@ -1045,15 +1091,15 @@
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
}
/* used in 2-pass strategy */
-static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus)
+static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
{
U32 s, sum=0;
- assert(ZSTD_FREQ_DIV+bonus > 0);
- for (s=0; s<=lastEltIndex; s++) {
+ assert(ZSTD_FREQ_DIV+bonus >= 0);
+ for (s=0; s<lastEltIndex+1; s++) {
table[s] <<= ZSTD_FREQ_DIV+bonus;
table[s]--;
sum += table[s];
}
return sum;
@@ -1061,47 +1107,82 @@
/* used in 2-pass strategy */
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
{
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
- optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1);
- optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1);
- optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1);
+ optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
+ optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
+ optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
}
+/* ZSTD_initStats_ultra():
+ * make a first compression pass, just to seed stats with more accurate starting values.
+ * only works on first block, with no dictionary and no ldm.
+ * this function cannot error, hence its constract must be respected.
+ */
+static void
+ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+ seqStore_t* seqStore,
+ U32 rep[ZSTD_REP_NUM],
+ const void* src, size_t srcSize)
+{
+ U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
+ memcpy(tmpRep, rep, sizeof(tmpRep));
+
+ DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
+ assert(ms->opt.litLengthSum == 0); /* first block */
+ assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
+ assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
+ assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
+
+ ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
+
+ /* invalidate first scan from history */
+ ZSTD_resetSeqStore(seqStore);
+ ms->window.base -= srcSize;
+ ms->window.dictLimit += (U32)srcSize;
+ ms->window.lowLimit = ms->window.dictLimit;
+ ms->nextToUpdate = ms->window.dictLimit;
+ ms->nextToUpdate3 = ms->window.dictLimit;
+
+ /* re-inforce weight of collected statistics */
+ ZSTD_upscaleStats(&ms->opt);
+}
+
size_t ZSTD_compressBlock_btultra(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
-#if 0
- /* 2-pass strategy (disabled)
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btultra2(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ const void* src, size_t srcSize)
+{
+ U32 const current = (U32)((const BYTE*)src - ms->window.base);
+ DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
+
+ /* 2-pass strategy:
* this strategy makes a first pass over first block to collect statistics
* and seed next round's statistics with it.
+ * After 1st pass, function forgets everything, and starts a new block.
+ * Consequently, this can only work if no data has been previously loaded in tables,
+ * aka, no dictionary, no prefix, no ldm preprocessing.
* The compression ratio gain is generally small (~0.5% on first block),
* the cost is 2x cpu time on first block. */
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
if ( (ms->opt.litLengthSum==0) /* first block */
- && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
- && (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */
- U32 tmpRep[ZSTD_REP_NUM];
- DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics");
- assert(ms->nextToUpdate >= ms->window.dictLimit
- && ms->nextToUpdate <= ms->window.dictLimit + 1);
- memcpy(tmpRep, rep, sizeof(tmpRep));
- ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
- ZSTD_resetSeqStore(seqStore);
- /* invalidate first scan from history */
- ms->window.base -= srcSize;
- ms->window.dictLimit += (U32)srcSize;
- ms->window.lowLimit = ms->window.dictLimit;
- ms->nextToUpdate = ms->window.dictLimit;
- ms->nextToUpdate3 = ms->window.dictLimit;
- /* re-inforce weight of collected statistics */
- ZSTD_upscaleStats(&ms->opt);
+ && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
+ && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
+ && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
+ && (srcSize > ZSTD_PREDEF_THRESHOLD)
+ ) {
+ ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
}
-#endif
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
}
size_t ZSTD_compressBlock_btopt_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -1128,5 +1209,9 @@
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
}
+
+/* note : no btultra2 variant for extDict nor dictMatchState,
+ * because btultra2 is not meant to work with dictionaries
+ * and is only specific for the first block (no prefix) */