contrib/zstd/lib/decompress/huf_decompress.c in extzstd-0.1.1 vs contrib/zstd/lib/decompress/huf_decompress.c in extzstd-0.2

- old
+ new

@@ -31,44 +31,34 @@ - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - Public forum : https://groups.google.com/forum/#!forum/lz4c ****************************************************************** */ /* ************************************************************** -* Compiler specifics -****************************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif - - -/* ************************************************************** * Dependencies ****************************************************************/ #include <string.h> /* memcpy, memset */ #include "bitstream.h" /* BIT_* */ +#include "compiler.h" #include "fse.h" /* header compression */ #define HUF_STATIC_LINKING_ONLY #include "huf.h" +#include "error_private.h" /* ************************************************************** * Error Management ****************************************************************/ +#define HUF_isError ERR_isError #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) + /*-***************************/ /* generic DTableDesc */ /*-***************************/ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; @@ -85,20 +75,32 @@ /* single-symbol decoding */ /*-***************************/ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ -size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize) +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) { - BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; - U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ U32 tableLog = 0; U32 nbSymbols = 0; size_t iSize; void* const dtPtr = DTable + 1; HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + U32* rankVal; + BYTE* huffWeight; + size_t spaceUsed32 = 0; + + rankVal = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; + huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + + if ((spaceUsed32 << 2) > wkspSize) + return ERROR(tableLog_tooLarge); + workSpace = (U32 *)workSpace + spaceUsed32; + wkspSize -= (spaceUsed32 << 2); + HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); if (HUF_isError(iSize)) return iSize; @@ -133,11 +135,18 @@ } } return iSize; } +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX2_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ BYTE const c = dt[val].byte; BIT_skipBits(Dstream, dt[val].nbBits); @@ -153,11 +162,11 @@ #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ if (MEM_64bits()) \ HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) -FORCE_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) +HINT_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) { BYTE* const pStart = p; /* up to 4 symbols at a time */ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) { @@ -210,22 +219,33 @@ DTableDesc dtd = HUF_getDTableDesc(DTable); if (dtd.tableType != 0) return ERROR(GENERIC); return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); } -size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX2 (DCtx, cSrc, cSrcSize); + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx); } + +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); } @@ -333,22 +353,32 @@ if (dtd.tableType != 0) return ERROR(GENERIC); return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); } -size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX2 (dctx, cSrc, cSrcSize); + size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize, + workSpace, wkspSize); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx); } + +size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } @@ -401,11 +431,12 @@ rankVal[weight] += length; } } } -typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1]; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, const sortedSymbol_t* sortedList, const U32 sortedListSize, const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline) @@ -445,25 +476,48 @@ } } rankVal[weight] += length; } } -size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize) +size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src, + size_t srcSize, void* workSpace, + size_t wkspSize) { - BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; - sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; - U32 rankStats[HUF_TABLELOG_MAX + 1] = { 0 }; - U32 rankStart0[HUF_TABLELOG_MAX + 2] = { 0 }; - U32* const rankStart = rankStart0+1; - rankVal_t rankVal; U32 tableLog, maxW, sizeOfSort, nbSymbols; DTableDesc dtd = HUF_getDTableDesc(DTable); U32 const maxTableLog = dtd.maxTableLog; size_t iSize; void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr; + U32 *rankStart; + rankValCol_t* rankVal; + U32* rankStats; + U32* rankStart0; + sortedSymbol_t* sortedSymbol; + BYTE* weightList; + size_t spaceUsed32 = 0; + + rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; + rankStats = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 1; + rankStart0 = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 2; + sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); + spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; + weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + + if ((spaceUsed32 << 2) > wkspSize) + return ERROR(tableLog_tooLarge); + workSpace = (U32 *)workSpace + spaceUsed32; + wkspSize -= (spaceUsed32 << 2); + + rankStart = rankStart0 + 1; + memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); + HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); @@ -525,10 +579,16 @@ dtd.tableType = 1; memcpy(DTable, &dtd, sizeof(dtd)); return iSize; } +size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX4_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ memcpy(op, dt+val, 2); @@ -543,11 +603,12 @@ if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); else { if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { BIT_skipBits(DStream, dt[val].nbBits); if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) - DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); } } return 1; } @@ -560,11 +621,11 @@ #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ if (MEM_64bits()) \ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) -FORCE_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) +HINT_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) { BYTE* const pStart = p; /* up to 8 symbols at a time */ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { @@ -624,22 +685,34 @@ DTableDesc dtd = HUF_getDTableDesc(DTable); if (dtd.tableType != 1) return ERROR(GENERIC); return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); } -size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX4 (DCtx, cSrc, cSrcSize); + size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx); } + +size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } @@ -746,22 +819,34 @@ if (dtd.tableType != 1) return ERROR(GENERIC); return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); } -size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) { const BYTE* ip = (const BYTE*) cSrc; - size_t hSize = HUF_readDTableX4 (dctx, cSrc, cSrcSize); + size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); } + +size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } @@ -814,15 +899,15 @@ /** HUF_selectDecoder() : * Tells which decoder is likely to decode faster, * based on a set of pre-determined metrics. * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . -* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ +* Assumption : 0 < cSrcSize, dstSize <= 128 KB */ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) { /* decoder timing evaluation */ - U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */ + U32 const Q = cSrcSize >= dstSize ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ U32 const D256 = (U32)(dstSize >> 8); U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */ @@ -859,30 +944,53 @@ return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; } } -size_t HUF_decompress4X_hufOnly (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + + +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, + size_t dstSize, const void* cSrc, + size_t cSrcSize, void* workSpace, + size_t wkspSize) +{ /* validation checks */ if (dstSize == 0) return ERROR(dstSize_tooSmall); - if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == 0) return ERROR(corruption_detected); { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : - HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): + HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); } } -size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) { /* validation checks */ if (dstSize == 0) return ERROR(dstSize_tooSmall); if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : - HUF_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); } +} + +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); }