contrib/zstd/lib/compress/fse_compress.c in extzstd-0.3.2 vs contrib/zstd/lib/compress/fse_compress.c in extzstd-0.3.3
- old
+ new
@@ -1,8 +1,8 @@
/* ******************************************************************
* FSE : Finite State Entropy encoder
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
* - Public forum : https://groups.google.com/forum/#!forum/lz4c
*
@@ -24,10 +24,11 @@
#include "../common/fse.h"
#include "../common/error_private.h"
#define ZSTD_DEPS_NEED_MALLOC
#define ZSTD_DEPS_NEED_MATH64
#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
+#include "../common/bits.h" /* ZSTD_highbit32 */
/* **************************************************************
* Error Management
****************************************************************/
@@ -73,56 +74,97 @@
void* const ptr = ct;
U16* const tableU16 = ( (U16*) ptr) + 2;
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
U32 const step = FSE_TABLESTEP(tableSize);
+ U32 const maxSV1 = maxSymbolValue+1;
- U32* cumul = (U32*)workSpace;
- FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
+ U16* cumul = (U16*)workSpace; /* size = maxSV1 */
+ FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
U32 highThreshold = tableSize-1;
- if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
+ assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
/* CTable header */
tableU16[-2] = (U16) tableLog;
tableU16[-1] = (U16) maxSymbolValue;
assert(tableLog < 16); /* required for threshold strategy to work */
/* For explanations on how to distribute symbol values over the table :
- * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+ * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
#ifdef __clang_analyzer__
ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
#endif
/* symbol start positions */
{ U32 u;
cumul[0] = 0;
- for (u=1; u <= maxSymbolValue+1; u++) {
+ for (u=1; u <= maxSV1; u++) {
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
cumul[u] = cumul[u-1] + 1;
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
} else {
- cumul[u] = cumul[u-1] + normalizedCounter[u-1];
+ assert(normalizedCounter[u-1] >= 0);
+ cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
+ assert(cumul[u] >= cumul[u-1]); /* no overflow */
} }
- cumul[maxSymbolValue+1] = tableSize+1;
+ cumul[maxSV1] = (U16)(tableSize+1);
}
/* Spread symbols */
- { U32 position = 0;
+ if (highThreshold == tableSize - 1) {
+ /* Case for no low prob count symbols. Lay down 8 bytes at a time
+ * to reduce branch misses since we are operating on a small block
+ */
+ BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
+ { U64 const add = 0x0101010101010101ull;
+ size_t pos = 0;
+ U64 sv = 0;
+ U32 s;
+ for (s=0; s<maxSV1; ++s, sv += add) {
+ int i;
+ int const n = normalizedCounter[s];
+ MEM_write64(spread + pos, sv);
+ for (i = 8; i < n; i += 8) {
+ MEM_write64(spread + pos + i, sv);
+ }
+ assert(n>=0);
+ pos += (size_t)n;
+ }
+ }
+ /* Spread symbols across the table. Lack of lowprob symbols means that
+ * we don't need variable sized inner loop, so we can unroll the loop and
+ * reduce branch misses.
+ */
+ { size_t position = 0;
+ size_t s;
+ size_t const unroll = 2; /* Experimentally determined optimal unroll */
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
+ size_t u;
+ for (u = 0; u < unroll; ++u) {
+ size_t const uPosition = (position + (u * step)) & tableMask;
+ tableSymbol[uPosition] = spread[s + u];
+ }
+ position = (position + (unroll * step)) & tableMask;
+ }
+ assert(position == 0); /* Must have initialized all positions */
+ }
+ } else {
+ U32 position = 0;
U32 symbol;
- for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+ for (symbol=0; symbol<maxSV1; symbol++) {
int nbOccurrences;
int const freq = normalizedCounter[symbol];
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
position = (position + step) & tableMask;
while (position > highThreshold)
position = (position + step) & tableMask; /* Low proba area */
} }
-
assert(position==0); /* Must have initialized all positions */
}
/* Build table */
{ U32 u; for (u=0; u<tableSize; u++) {
@@ -142,56 +184,51 @@
break;
case -1:
case 1:
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
- symbolTT[s].deltaFindState = total - 1;
+ assert(total <= INT_MAX);
+ symbolTT[s].deltaFindState = (int)(total - 1);
total ++;
break;
default :
- {
- U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
- U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
+ assert(normalizedCounter[s] > 1);
+ { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
+ U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
- symbolTT[s].deltaFindState = total - normalizedCounter[s];
- total += normalizedCounter[s];
+ symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
+ total += (unsigned)normalizedCounter[s];
} } } }
#if 0 /* debug : symbol costs */
DEBUGLOG(5, "\n --- table statistics : ");
{ U32 symbol;
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
symbol, normalizedCounter[symbol],
FSE_getMaxNbBits(symbolTT, symbol),
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
- }
- }
+ } }
#endif
return 0;
}
-#ifndef ZSTD_NO_UNUSED_FUNCTIONS
-size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
-{
- FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
- return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
-}
-#endif
-
#ifndef FSE_COMMONDEFS_ONLY
-
/*-**************************************************************
* FSE NCount encoding
****************************************************************/
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
{
- size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+ size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
+ + 4 /* bitCount initialized at 4 */
+ + 2 /* first two symbols may use one additional bit each */) / 8)
+ + 1 /* round up to whole nb bytes */
+ + 2 /* additional two bytes for bitstream flush */;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
}
static size_t
FSE_writeNCount_generic (void* header, size_t headerBufferSize,
@@ -304,33 +341,23 @@
/*-**************************************************************
* FSE Compression Code
****************************************************************/
-FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
-{
- size_t size;
- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
- size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
- return (FSE_CTable*)ZSTD_malloc(size);
-}
-
-void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
-
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
- U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
- U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+ U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
+ U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
assert(srcSize > 1); /* Not supported, RLE should be used instead */
return minBits;
}
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
{
- U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
+ U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
U32 tableLog = maxTableLog;
U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
assert(srcSize > 1); /* Not supported, RLE should be used instead */
if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
@@ -494,44 +521,10 @@
#endif
return tableLog;
}
-
-/* fake FSE_CTable, for raw (uncompressed) input */
-size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
-{
- const unsigned tableSize = 1 << nbBits;
- const unsigned tableMask = tableSize - 1;
- const unsigned maxSymbolValue = tableMask;
- void* const ptr = ct;
- U16* const tableU16 = ( (U16*) ptr) + 2;
- void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
- FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
- unsigned s;
-
- /* Sanity checks */
- if (nbBits < 1) return ERROR(GENERIC); /* min size */
-
- /* header */
- tableU16[-2] = (U16) nbBits;
- tableU16[-1] = (U16) maxSymbolValue;
-
- /* Build table */
- for (s=0; s<tableSize; s++)
- tableU16[s] = (U16)(tableSize + s);
-
- /* Build Symbol Transformation Table */
- { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
- for (s=0; s<=maxSymbolValue; s++) {
- symbolTT[s].deltaNbBits = deltaNbBits;
- symbolTT[s].deltaFindState = s-1;
- } }
-
- return 0;
-}
-
/* fake FSE_CTable, for rle input (always same symbol) */
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
{
void* ptr = ct;
U16* tableU16 = ( (U16*) ptr) + 2;
@@ -625,81 +618,7 @@
return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
}
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
-
-#ifndef ZSTD_NO_UNUSED_FUNCTIONS
-/* FSE_compress_wksp() :
- * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
- * `wkspSize` size must be `(1<<tableLog)`.
- */
-size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
-{
- BYTE* const ostart = (BYTE*) dst;
- BYTE* op = ostart;
- BYTE* const oend = ostart + dstSize;
-
- unsigned count[FSE_MAX_SYMBOL_VALUE+1];
- S16 norm[FSE_MAX_SYMBOL_VALUE+1];
- FSE_CTable* CTable = (FSE_CTable*)workSpace;
- size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
- void* scratchBuffer = (void*)(CTable + CTableSize);
- size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
-
- /* init conditions */
- if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
- if (srcSize <= 1) return 0; /* Not compressible */
- if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
- if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
-
- /* Scan input and build symbol stats */
- { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
- if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
- if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
- if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
- }
-
- tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
- CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
-
- /* Write table description header */
- { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
- op += nc_err;
- }
-
- /* Compress */
- CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
- { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
- if (cSize == 0) return 0; /* not enough space for compressed data */
- op += cSize;
- }
-
- /* check compressibility */
- if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
-
- return op-ostart;
-}
-
-typedef struct {
- FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
- union {
- U32 hist_wksp[HIST_WKSP_SIZE_U32];
- BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
- } workspace;
-} fseWkspMax_t;
-
-size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
-{
- fseWkspMax_t scratchBuffer;
- DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
- if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
- return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
-}
-
-size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
- return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
-}
-#endif
#endif /* FSE_COMMONDEFS_ONLY */