ext/zstdruby/libzstd/compress/zstd_compress_internal.h in zstd-ruby-1.5.0.0 vs ext/zstdruby/libzstd/compress/zstd_compress_internal.h in zstd-ruby-1.5.1.0

- old
+ new

@@ -61,11 +61,11 @@ ZSTD_dictContentType_e dictContentType; ZSTD_CDict* cdict; } ZSTD_localDict; typedef struct { - HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)]; HUF_repeat repeatMode; } ZSTD_hufCTables_t; typedef struct { FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; @@ -177,11 +177,11 @@ U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ - ZSTD_literalCompressionMode_e literalCompressionMode; + ZSTD_paramSwitch_e literalCompressionMode; } optState_t; typedef struct { ZSTD_entropyCTables_t entropy; U32 rep[ZSTD_REP_NUM]; @@ -197,10 +197,12 @@ * ZSTD_window_init(). Useful for debugging coredumps * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY. */ } ZSTD_window_t; +#define ZSTD_WINDOW_START_INDEX 2 + typedef struct ZSTD_matchState_t ZSTD_matchState_t; #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ struct ZSTD_matchState_t { @@ -262,11 +264,11 @@ size_t splitIndices[LDM_BATCH_SIZE]; ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; } ldmState_t; typedef struct { - U32 enableLdm; /* 1 if enable long distance matching */ + ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ U32 hashLog; /* Log size of hashTable */ U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ U32 minMatchLength; /* Minimum match length */ U32 hashRateLog; /* Log number of entries to skip */ U32 windowLog; /* Window log for the LDM */ @@ -293,11 +295,11 @@ int srcSizeHint; /* User's best guess of source size. * Hint is not valid when srcSizeHint == 0. * There is no guarantee that hint is close to actual source size */ ZSTD_dictAttachPref_e attachDictPref; - ZSTD_literalCompressionMode_e literalCompressionMode; + ZSTD_paramSwitch_e literalCompressionMode; /* Multithreading: used to pass parameters to mtctx */ int nbWorkers; size_t jobSize; int overlapLog; @@ -316,14 +318,14 @@ /* Sequence compression API */ ZSTD_sequenceFormat_e blockDelimiters; int validateSequences; /* Block splitting */ - int splitBlocks; + ZSTD_paramSwitch_e useBlockSplitter; /* Param for deciding whether to use row-based matchfinder */ - ZSTD_useRowMatchFinderMode_e useRowMatchFinder; + ZSTD_paramSwitch_e useRowMatchFinder; /* Always load a dictionary in ext-dict mode (not prefix mode)? */ int deterministicRefPrefix; /* Internal use, for createCCtxParams() and freeCCtxParams() only */ @@ -341,10 +343,26 @@ typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; +/** + * Struct that contains all elements of block splitter that should be allocated + * in a wksp. + */ +#define ZSTD_MAX_NB_BLOCK_SPLITS 196 +typedef struct { + seqStore_t fullSeqStoreChunk; + seqStore_t firstHalfSeqStore; + seqStore_t secondHalfSeqStore; + seqStore_t currSeqStore; + seqStore_t nextSeqStore; + + U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS]; + ZSTD_entropyCTablesMetadata_t entropyMetadata; +} ZSTD_blockSplitCtx; + struct ZSTD_CCtx_s { ZSTD_compressionStage_e stage; int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ ZSTD_CCtx_params requestedParams; @@ -372,11 +390,11 @@ size_t maxNbLdmSequences; rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ ZSTD_blockState_t blockState; U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */ - /* Wether we are streaming or not */ + /* Whether we are streaming or not */ ZSTD_buffered_policy_e bufferedPolicy; /* streaming */ char* inBuff; size_t inBuffSize; @@ -406,10 +424,13 @@ /* Tracing */ #if ZSTD_TRACE ZSTD_TraceCtx traceCtx; #endif + + /* Workspace for block splitter */ + ZSTD_blockSplitCtx blockSplitCtx; }; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; typedef enum { @@ -440,11 +461,11 @@ } ZSTD_cParamMode_e; typedef size_t (*ZSTD_blockCompressor) ( ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); MEM_STATIC U32 ZSTD_LLcode(U32 litLength) { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, @@ -547,21 +568,21 @@ ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); return (srcSize >> minlog) + 2; } -MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams) { switch (cctxParams->literalCompressionMode) { - case ZSTD_lcm_huffman: + case ZSTD_ps_enable: return 0; - case ZSTD_lcm_uncompressed: + case ZSTD_ps_disable: return 1; default: assert(0 /* impossible: pre-validated */); - /* fall-through */ - case ZSTD_lcm_auto: + ZSTD_FALLTHROUGH; + case ZSTD_ps_auto: return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); } } /*! ZSTD_safecopyLiterals() : @@ -649,12 +670,18 @@ if (MEM_64bits()) { # if defined(_MSC_VER) && defined(_WIN64) # if STATIC_BMI2 return _tzcnt_u64(val) >> 3; # else - unsigned long r = 0; - return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0; + if (val != 0) { + unsigned long r; + _BitScanForward64(&r, (U64)val); + return (unsigned)(r >> 3); + } else { + /* Should not reach this code path */ + __assume(0); + } # endif # elif defined(__GNUC__) && (__GNUC__ >= 4) return (__builtin_ctzll((U64)val) >> 3); # else static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, @@ -667,12 +694,18 @@ 7, 2, 6, 5, 7, 6, 7, 7 }; return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; # endif } else { /* 32 bits */ # if defined(_MSC_VER) - unsigned long r=0; - return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0; + if (val != 0) { + unsigned long r; + _BitScanForward(&r, (U32)val); + return (unsigned)(r >> 3); + } else { + /* Should not reach this code path */ + __assume(0); + } # elif defined(__GNUC__) && (__GNUC__ >= 3) return (__builtin_ctz((U32)val) >> 3); # else static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, @@ -685,12 +718,18 @@ if (MEM_64bits()) { # if defined(_MSC_VER) && defined(_WIN64) # if STATIC_BMI2 return _lzcnt_u64(val) >> 3; # else - unsigned long r = 0; - return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0; + if (val != 0) { + unsigned long r; + _BitScanReverse64(&r, (U64)val); + return (unsigned)(r >> 3); + } else { + /* Should not reach this code path */ + __assume(0); + } # endif # elif defined(__GNUC__) && (__GNUC__ >= 4) return (__builtin_clzll(val) >> 3); # else unsigned r; @@ -700,12 +739,18 @@ r += (!val); return r; # endif } else { /* 32 bits */ # if defined(_MSC_VER) - unsigned long r = 0; - return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0; + if (val != 0) { + unsigned long r; + _BitScanReverse(&r, (unsigned long)val); + return (unsigned)(r >> 3); + } else { + /* Should not reach this code path */ + __assume(0); + } # elif defined(__GNUC__) && (__GNUC__ >= 3) return (__builtin_clz((U32)val) >> 3); # else unsigned r; if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } @@ -882,13 +927,13 @@ window->dictLimit = end; } MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window) { - return window.dictLimit == 1 && - window.lowLimit == 1 && - (window.nextSrc - window.base) == 1; + return window.dictLimit == ZSTD_WINDOW_START_INDEX && + window.lowLimit == ZSTD_WINDOW_START_INDEX && + (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX; } /** * ZSTD_window_hasExtDict(): * Returns non-zero if the window has a non-empty extDict. @@ -935,11 +980,13 @@ U32 loadedDictEnd, void const* src) { U32 const cycleSize = 1u << cycleLog; U32 const curr = (U32)((BYTE const*)src - window.base); - U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize); + U32 const minIndexToOverflowCorrect = cycleSize + + MAX(maxDist, cycleSize) + + ZSTD_WINDOW_START_INDEX; /* Adjust the min index to backoff the overflow correction frequency, * so we don't waste too much CPU in overflow correction. If this * computation overflows we don't really care, we just need to make * sure it is at least minIndexToOverflowCorrect. @@ -1010,14 +1057,18 @@ * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32. */ U32 const cycleSize = 1u << cycleLog; U32 const cycleMask = cycleSize - 1; U32 const curr = (U32)((BYTE const*)src - window->base); - U32 const currentCycle0 = curr & cycleMask; - /* Exclude zero so that newCurrent - maxDist >= 1. */ - U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0; - U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize); + U32 const currentCycle = curr & cycleMask; + /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */ + U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX + ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX) + : 0; + U32 const newCurrent = currentCycle + + currentCycleCorrection + + MAX(maxDist, cycleSize); U32 const correction = curr - newCurrent; /* maxDist must be a power of two so that: * (newCurrent & cycleMask) == (curr & cycleMask) * This is required to not corrupt the chains / binary tree. */ @@ -1029,18 +1080,24 @@ assert(correction > 1<<28); } window->base += correction; window->dictBase += correction; - if (window->lowLimit <= correction) window->lowLimit = 1; - else window->lowLimit -= correction; - if (window->dictLimit <= correction) window->dictLimit = 1; - else window->dictLimit -= correction; + if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) { + window->lowLimit = ZSTD_WINDOW_START_INDEX; + } else { + window->lowLimit -= correction; + } + if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) { + window->dictLimit = ZSTD_WINDOW_START_INDEX; + } else { + window->dictLimit -= correction; + } /* Ensure we can still reference the full window. */ assert(newCurrent >= maxDist); - assert(newCurrent - maxDist >= 1); + assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX); /* Ensure that lowLimit and dictLimit didn't underflow. */ assert(window->lowLimit <= newCurrent); assert(window->dictLimit <= newCurrent); ++window->nbOverflowCorrections; @@ -1147,15 +1204,16 @@ } } } } MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { ZSTD_memset(window, 0, sizeof(*window)); - window->base = (BYTE const*)""; - window->dictBase = (BYTE const*)""; - window->dictLimit = 1; /* start from 1, so that 1st position is valid */ - window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ - window->nextSrc = window->base + 1; /* see issue #1241 */ + window->base = (BYTE const*)" "; + window->dictBase = (BYTE const*)" "; + ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */ + window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */ + window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */ window->nbOverflowCorrections = 0; } /** * ZSTD_window_update(): @@ -1204,18 +1262,18 @@ /** * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. */ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) { - U32 const maxDistance = 1U << windowLog; - U32 const lowestValid = ms->window.lowLimit; - U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; - U32 const isDictionary = (ms->loadedDictEnd != 0); + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.lowLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't * valid for the entire block. So this check is sufficient to find the lowest valid match index. */ - U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; return matchLowest; } /** * Returns the lowest allowed match index in the prefix.