contrib/zstd/lib/zstd.h in extzstd-0.3 vs contrib/zstd/lib/zstd.h in extzstd-0.3.1
- old
+ new
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
@@ -13,10 +13,11 @@
#ifndef ZSTD_H_235446
#define ZSTD_H_235446
/* ====== Dependency ======*/
+#include <limits.h> /* INT_MAX */
#include <stddef.h> /* size_t */
/* ===== ZSTDLIB_API : control library symbols visibility ===== */
#ifndef ZSTDLIB_VISIBILITY
@@ -69,31 +70,31 @@
*******************************************************************************/
/*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 4
-#define ZSTD_VERSION_RELEASE 0
+#define ZSTD_VERSION_RELEASE 5
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
#define ZSTD_QUOTE(str) #str
#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */
-/***************************************
-* Default constant
-***************************************/
+/* *************************************
+ * Default constant
+ ***************************************/
#ifndef ZSTD_CLEVEL_DEFAULT
# define ZSTD_CLEVEL_DEFAULT 3
#endif
-/***************************************
-* Constants
-***************************************/
+/* *************************************
+ * Constants
+ ***************************************/
/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */
#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */
#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
@@ -181,21 +182,30 @@
/***************************************
* Explicit context
***************************************/
/*= Compression context
* When compressing many times,
- * it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ * it is recommended to allocate a context just once,
+ * and re-use it for each successive compression operation.
* This will make workload friendlier for system's memory.
- * Use one context per thread for parallel execution in multi-threaded environments. */
+ * Note : re-using context is just a speed / resource optimization.
+ * It doesn't change the compression ratio, which remains identical.
+ * Note 2 : In multi-threaded environments,
+ * use one different context per thread for parallel execution.
+ */
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
/*! ZSTD_compressCCtx() :
- * Same as ZSTD_compress(), using an explicit ZSTD_CCtx
- * The function will compress at requested compression level,
- * ignoring any other parameter */
+ * Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ * Important : in order to behave similarly to `ZSTD_compress()`,
+ * this function compresses at requested compression level,
+ * __ignoring any other parameter__ .
+ * If any advanced parameter was set using the advanced API,
+ * they will all be reset. Only `compressionLevel` remains.
+ */
ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel);
@@ -226,11 +236,11 @@
/* API design :
* Parameters are pushed one by one into an existing context,
* using ZSTD_CCtx_set*() functions.
* Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
* "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
- * They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()
+ * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
*
* It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
*
* This API supercedes all other "advanced" API entry points in the experimental section.
* In the future, we expect to remove from experimental API entry points which are redundant with this API.
@@ -254,39 +264,50 @@
typedef enum {
/* compression parameters
* Note: When compressing with a ZSTD_CDict these parameters are superseded
- * by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict()
- * for more info (superseded-by-cdict). */
- ZSTD_c_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
+ * by the parameters used to construct the ZSTD_CDict.
+ * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+ ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
+ * Note that exact compression parameters are dynamically determined,
+ * depending on both compression level and srcSize (when known).
* Default level is ZSTD_CLEVEL_DEFAULT==3.
* Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
* Note 1 : it's possible to pass a negative compression level.
- * Note 2 : setting a level sets all default values of other compression parameters */
+ * Note 2 : setting a level does not automatically set all other compression parameters
+ * to default. Setting this will however eventually dynamically impact the compression
+ * parameters which have not been manually set. The manually set
+ * ones will 'stick'. */
+ /* Advanced compression parameters :
+ * It's possible to pin down compression parameters to some specific values.
+ * In which case, these values are no longer dynamically selected by the compressor */
ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2.
+ * This will set a memory budget for streaming decompression,
+ * with larger values requiring more memory
+ * and typically compressing more.
* Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
* Special: value 0 means "use default windowLog".
* Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
- * requires explicitly allowing such window size at decompression stage if using streaming. */
+ * requires explicitly allowing such size at streaming decompression stage. */
ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2.
* Resulting memory usage is (1 << (hashLog+2)).
* Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
* Larger tables improve compression ratio of strategies <= dFast,
* and improve speed of strategies > dFast.
* Special: value 0 means "use default hashLog". */
ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2.
* Resulting memory usage is (1 << (chainLog+2)).
* Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
* Larger tables result in better and slower compression.
- * This parameter is useless when using "fast" strategy.
+ * This parameter is useless for "fast" strategy.
* It's still useful when using "dfast" strategy,
* in which case it defines a secondary probe table.
* Special: value 0 means "use default chainLog". */
ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2.
* More attempts result in better and slower compression.
- * This parameter is useless when using "fast" and "dFast" strategies.
+ * This parameter is useless for "fast" and "dFast" strategies.
* Special: value 0 means "use default searchLog". */
ZSTD_c_minMatch=105, /* Minimum size of searched matches.
* Note that Zstandard can still find matches of smaller size,
* it just tweaks its search algorithm to look for this size and larger.
* Larger values increase compression and decompression speed, but decrease ratio.
@@ -337,11 +358,11 @@
/* frame parameters */
ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
* Content size must be known at the beginning of compression.
* This is automatically the case when using ZSTD_compress2(),
- * For streaming variants, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+ * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */
/* multi-threading parameters */
/* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
@@ -356,11 +377,11 @@
* Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
* Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
* 0 means default, which is dynamically determined based on compression parameters.
* Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
- * The minimum size is automatically and transparently enforced */
+ * The minimum size is automatically and transparently enforced. */
ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size.
* The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
* It helps preserve compression ratio, while each job is compressed in parallel.
* This value is enforced only when nbWorkers >= 1.
* Larger values increase compression ratio, but decrease speed.
@@ -378,19 +399,23 @@
* ZSTD_c_rsyncable
* ZSTD_c_format
* ZSTD_c_forceMaxWindow
* ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode
+ * ZSTD_c_targetCBlockSize
+ * ZSTD_c_srcSizeHint
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
*/
ZSTD_c_experimentalParam1=500,
ZSTD_c_experimentalParam2=10,
ZSTD_c_experimentalParam3=1000,
ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002,
+ ZSTD_c_experimentalParam6=1003,
+ ZSTD_c_experimentalParam7=1004
} ZSTD_cParameter;
typedef struct {
size_t error;
int lowerBound;
@@ -495,15 +520,17 @@
* Special: value 0 means "use default maximum windowLog". */
/* note : additional experimental parameters are also available
* within the experimental section of the API.
* At the time of this writing, they include :
- * ZSTD_c_format
+ * ZSTD_d_format
+ * ZSTD_d_stableOutBuffer
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly
*/
- ZSTD_d_experimentalParam1=1000
+ ZSTD_d_experimentalParam1=1000,
+ ZSTD_d_experimentalParam2=1001
} ZSTD_dParameter;
/*! ZSTD_dParam_getBounds() :
* All parameters must belong to an interval with lower and upper bounds,
@@ -655,38 +682,54 @@
ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
ZSTD_outBuffer* output,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp);
+
+/* These buffer sizes are softly recommended.
+ * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
+ * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
+ * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
+ *
+ * However, note that these recommendations are from the perspective of a C caller program.
+ * If the streaming interface is invoked from some other language,
+ * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
+ * a major performance rule is to reduce crossing such interface to an absolute minimum.
+ * It's not rare that performance ends being spent more into the interface, rather than compression itself.
+ * In which cases, prefer using large buffers, as large as practical,
+ * for both input and output, to reduce the nb of roundtrips.
+ */
ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
-ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
-/*******************************************************************************
- * This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and
- * ZSTD_compressStream2(). It is redundant, but is still fully supported.
+
+/* *****************************************************************************
+ * This following is a legacy streaming API.
+ * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+ * It is redundant, but remains fully supported.
* Advanced parameters and dictionary compression can only be used through the
* new API.
******************************************************************************/
-/**
+/*!
* Equivalent to:
*
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
* ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
*/
ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
-/**
+/*!
* Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
* NOTE: The return value is different. ZSTD_compressStream() returns a hint for
* the next read size (if non-zero and not an error). ZSTD_compressStream2()
- * returns the number of bytes left to flush (if non-zero and not an error).
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
*/
ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
-/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
/*-***************************************************************************
* Streaming decompression - HowTo
@@ -723,11 +766,11 @@
/*===== Streaming decompression functions =====*/
/* This function is redundant with the advanced API and equivalent to:
*
- * ZSTD_DCtx_reset(zds);
+ * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
* ZSTD_DCtx_refDDict(zds, NULL);
*/
ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
@@ -768,16 +811,21 @@
* Bulk processing dictionary API
**********************************/
typedef struct ZSTD_CDict_s ZSTD_CDict;
/*! ZSTD_createCDict() :
- * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
- * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
+ * When compressing multiple messages or blocks using the same dictionary,
+ * it's recommended to digest the dictionary only once, since it's a costly operation.
+ * ZSTD_createCDict() will create a state from digesting a dictionary.
+ * The resulting state can be used for future compression operations with very limited startup cost.
* ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
- * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
- * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ * in which case the only thing that it transports is the @compressionLevel.
+ * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
int compressionLevel);
/*! ZSTD_freeCDict() :
* Function frees memory allocated by ZSTD_createCDict(). */
@@ -900,11 +948,11 @@
* ensure that the window size is large enough to contain the entire source.
* See ZSTD_c_windowLog.
* Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
* It's a CPU consuming operation, with non-negligible impact on latency.
* If there is a need to use the same prefix multiple times, consider loadDictionary instead.
- * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent).
+ * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
* Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
const void* prefix, size_t prefixSize);
/*! ZSTD_DCtx_loadDictionary() :
@@ -944,11 +992,11 @@
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
* Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
* Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
* Prefix buffer must remain unmodified up to the end of frame,
* reached when ZSTD_decompressStream() returns 0.
- * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
+ * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
* Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
* Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
* A full dictionary is more costly, as it requires building tables.
*/
ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
@@ -967,11 +1015,11 @@
ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#endif /* ZSTD_H_235446 */
-/****************************************************************************************
+/* **************************************************************************************
* ADVANCED AND EXPERIMENTAL FUNCTIONS
****************************************************************************************
* The definitions in the following section are considered experimental.
* They are provided for advanced scenarios.
* They should never be used with a dynamic library, as prototypes may change in the future.
@@ -989,12 +1037,12 @@
* They can still change in future versions.
* Some of them are planned to remain in the static_only section indefinitely.
* Some of them might be removed in the future (especially when redundant with existing stable functions)
* ***************************************************************************************/
-#define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size required to query frame header size */
-#define ZSTD_FRAMEHEADERSIZE_MIN 6
+#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */
+#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2)
#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */
#define ZSTD_SKIPPABLEHEADERSIZE 8
/* compression parameter bounds */
#define ZSTD_WINDOWLOG_MAX_32 30
@@ -1035,19 +1083,43 @@
#define ZSTD_LDM_BUCKETSIZELOG_MIN 1
#define ZSTD_LDM_BUCKETSIZELOG_MAX 8
#define ZSTD_LDM_HASHRATELOG_MIN 0
#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
+/* Advanced parameter bounds */
+#define ZSTD_TARGETCBLOCKSIZE_MIN 64
+#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
+#define ZSTD_SRCSIZEHINT_MIN 0
+#define ZSTD_SRCSIZEHINT_MAX INT_MAX
+
/* internal */
#define ZSTD_HASHLOG3_MAX 17
/* --- Advanced types --- */
typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
typedef struct {
+ unsigned int matchPos; /* Match pos in dst */
+ /* If seqDef.offset > 3, then this is seqDef.offset - 3
+ * If seqDef.offset < 3, then this is the corresponding repeat offset
+ * But if seqDef.offset < 3 and litLength == 0, this is the
+ * repeat offset before the corresponding repeat offset
+ * And if seqDef.offset == 3 and litLength == 0, this is the
+ * most recent repeat offset - 1
+ */
+ unsigned int offset;
+ unsigned int litLength; /* Literal length */
+ unsigned int matchLength; /* Match length */
+ /* 0 when seq not rep and seqDef.offset otherwise
+ * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
+ */
+ unsigned int rep;
+} ZSTD_Sequence;
+
+typedef struct {
unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */
unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
unsigned hashLog; /**< dispatch table : larger == faster, more memory */
unsigned searchLog; /**< nb of searches : larger == more compression, slower */
unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */
@@ -1072,36 +1144,27 @@
ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
} ZSTD_dictContentType_e;
typedef enum {
ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */
- ZSTD_dlm_byRef = 1, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
+ ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
} ZSTD_dictLoadMethod_e;
typedef enum {
- /* Opened question : should we have a format ZSTD_f_auto ?
- * Today, it would mean exactly the same as ZSTD_f_zstd1.
- * But, in the future, should several formats become supported,
- * on the compression side, it would mean "default format".
- * On the decompression side, it would mean "automatic format detection",
- * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames".
- * Since meaning is a little different, another option could be to define different enums for compression and decompression.
- * This question could be kept for later, when there are actually multiple formats to support,
- * but there is also the question of pinning enum values, and pinning value `0` is especially important */
ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */
- ZSTD_f_zstd1_magicless = 1, /* Variant of zstd frame format, without initial 4-bytes magic number.
+ ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number.
* Useful to save 4 bytes per generated frame.
* Decoder cannot recognise automatically this format, requiring this instruction. */
} ZSTD_format_e;
typedef enum {
/* Note: this enum and the behavior it controls are effectively internal
* implementation details of the compressor. They are expected to continue
* to evolve and should be considered only in the context of extremely
* advanced performance tuning.
*
- * Zstd currently supports the use of a CDict in two ways:
+ * Zstd currently supports the use of a CDict in three ways:
*
* - The contents of the CDict can be copied into the working context. This
* means that the compression can search both the dictionary and input
* while operating on a single set of internal tables. This makes
* the compression faster per-byte of input. However, the initial copy of
@@ -1113,27 +1176,34 @@
* slower per input byte, because the compressor has to search two sets of
* tables. However, this model incurs no start-up cost (as long as the
* working context's tables can be reused). For small inputs, this can be
* faster than copying the CDict's tables.
*
+ * - The CDict's tables are not used at all, and instead we use the working
+ * context alone to reload the dictionary and use params based on the source
+ * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+ * This method is effective when the dictionary sizes are very small relative
+ * to the input size, and the input size is fairly large to begin with.
+ *
* Zstd has a simple internal heuristic that selects which strategy to use
* at the beginning of a compression. However, if experimentation shows that
* Zstd is making poor choices, it is possible to override that choice with
* this enum.
*/
ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */
ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */
+ ZSTD_dictForceLoad = 3 /* Always reload the dictionary */
} ZSTD_dictAttachPref_e;
typedef enum {
ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level.
* Negative compression levels will be uncompressed, and positive compression
* levels will be compressed. */
ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be
* emitted if Huffman compression is not profitable. */
- ZSTD_lcm_uncompressed = 2, /**< Always emit uncompressed literals. */
+ ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */
} ZSTD_literalCompressionMode_e;
/***************************************
* Frame size functions
@@ -1160,11 +1230,11 @@
* note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
* read each contained frame header. This is fast as most of the data is skipped,
* however it does mean that all frame data must be present and valid. */
ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
-/** ZSTD_decompressBound() :
+/*! ZSTD_decompressBound() :
* `src` should point to the start of a series of ZSTD encoded and/or skippable frames
* `srcSize` must be the _exact_ size of this series
* (i.e. there should be a frame boundary at `src + srcSize`)
* @return : - upper-bound for the decompressed size of all data in all successive frames
* - if an error occured: ZSTD_CONTENTSIZE_ERROR
@@ -1181,24 +1251,47 @@
* srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
* @return : size of the Frame Header,
* or an error code (if srcSize is too small) */
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+/*! ZSTD_getSequences() :
+ * Extract sequences from the sequence store
+ * zc can be used to insert custom compression params.
+ * This function invokes ZSTD_compress2
+ * @return : number of sequences extracted
+ */
+ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+ size_t outSeqsSize, const void* src, size_t srcSize);
+
/***************************************
* Memory management
***************************************/
/*! ZSTD_estimate*() :
* These functions make it possible to estimate memory usage
* of a future {D,C}Ctx, before its creation.
- * ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
- * It will also consider src size to be arbitrarily "large", which is worst case.
- * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
- * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
- * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
- * Note : CCtx size estimation is only correct for single-threaded compression. */
+ *
+ * ZSTD_estimateCCtxSize() will provide a memory budget large enough
+ * for any compression level up to selected one.
+ * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
+ * does not include space for a window buffer.
+ * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ * The estimate will assume the input may be arbitrarily large,
+ * which is the worst case.
+ *
+ * When srcSize can be bound by a known and rather "small" value,
+ * this fact can be used to provide a tighter estimation
+ * because the CCtx compression context will need less memory.
+ * This tighter estimation can be provided by more advanced functions
+ * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+ * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+ * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+ *
+ * Note 2 : only single-threaded compression is supported.
+ * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ */
ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
@@ -1305,11 +1398,12 @@
/*! ZSTD_createCDict_byReference() :
* Create a digested dictionary for compression
* Dictionary content is just referenced, not duplicated.
* As a consequence, `dictBuffer` **must** outlive CDict,
- * and its content must remain unmodified throughout the lifetime of CDict. */
+ * and its content must remain unmodified throughout the lifetime of CDict.
+ * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
/*! ZSTD_getCParams() :
* @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
* `estimatedSrcSize` value is optional, select 0 if not known */
@@ -1332,19 +1426,23 @@
* cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
* This function never fails (wide contract) */
ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
/*! ZSTD_compress_advanced() :
- * Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure) */
+ * Note : this function is now DEPRECATED.
+ * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
ZSTD_parameters params);
/*! ZSTD_compress_usingCDict_advanced() :
- * Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */
+ * Note : this function is now REDUNDANT.
+ * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+ * This prototype will be marked as deprecated and generate compilation warning in some future version */
ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
ZSTD_frameParameters fParams);
@@ -1407,10 +1505,21 @@
* The value must be of type ZSTD_literalCompressionMode_e.
* See ZSTD_literalCompressionMode_t enum definition for details.
*/
#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
+/* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size (default:0) */
+#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
+
+/* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size,
+ * but compression ratio may regress significantly if guess considerably underestimates */
+#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
+
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.
* @return : 0, or an error code (which can be tested with ZSTD_isError()).
*/
@@ -1541,10 +1650,41 @@
/* ZSTD_d_format
* experimental parameter,
* allowing selection between ZSTD_format_e input compression formats
*/
#define ZSTD_d_format ZSTD_d_experimentalParam1
+/* ZSTD_d_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the decompressor, and
+ * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
+ * MUST be large enough to fit the entire decompressed frame. This will be
+ * checked when the frame content size is known. The data in the ZSTD_outBuffer
+ * in the range [dst, dst + pos) MUST not be modified during decompression
+ * or you will get data corruption.
+ *
+ * When this flags is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer, but it will still allocate
+ * an input buffer large enough to fit any compressed block. This will also
+ * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
+ * If you need to avoid the input buffer allocation use the buffer-less
+ * streaming API.
+ *
+ * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, decompression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
+ * not be modified during decompression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
+ * matches. Normally zstd maintains its own buffer for this purpose, but passing
+ * this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
/*! ZSTD_DCtx_setFormat() :
* Instruct the decoder context about what kind of data to decode next.
* This instruction is mandatory to decode data without a fully-formed header,
* such ZSTD_f_zstd1_magicless for example.
@@ -1579,56 +1719,85 @@
* ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
*
* pledgedSrcSize must be correct. If it is not known at init time, use
* ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
* "0" also disables frame content size field. It may be enabled in the future.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
-ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t
+ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+ int compressionLevel,
+ unsigned long long pledgedSrcSize);
+
/**! ZSTD_initCStream_usingDict() :
* This function is deprecated, and is equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
* ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
*
* Creates of an internal CDict (incompatible with static CCtx), except if
* dict == NULL or dictSize < 8, in which case no dict is used.
- * Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if
+ * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
* it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
-ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+ const void* dict, size_t dictSize,
+ int compressionLevel);
+
/**! ZSTD_initCStream_advanced() :
* This function is deprecated, and is approximately equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- * ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is
+ * // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ * for ((param, value) : params) {
+ * ZSTD_CCtx_setParameter(zcs, param, value);
+ * }
* ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
* ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
*
- * pledgedSrcSize must be correct. If srcSize is not known at init time, use
- * value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy.
+ * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
+ * pledgedSrcSize must be correct.
+ * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
-ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
- ZSTD_parameters params, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t
+ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+ const void* dict, size_t dictSize,
+ ZSTD_parameters params,
+ unsigned long long pledgedSrcSize);
+
/**! ZSTD_initCStream_usingCDict() :
* This function is deprecated, and equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* ZSTD_CCtx_refCDict(zcs, cdict);
*
* note : cdict will just be referenced, and must outlive compression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+
/**! ZSTD_initCStream_usingCDict_advanced() :
- * This function is deprecated, and is approximately equivalent to:
+ * This function is DEPRECATED, and is approximately equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- * ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is
+ * // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ * for ((fParam, value) : fParams) {
+ * ZSTD_CCtx_setParameter(zcs, fParam, value);
+ * }
* ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
* ZSTD_CCtx_refCDict(zcs, cdict);
*
* same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
* pledgedSrcSize must be correct. If srcSize is not known at init time, use
* value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
-ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+ const ZSTD_CDict* cdict,
+ ZSTD_frameParameters fParams,
+ unsigned long long pledgedSrcSize);
/*! ZSTD_resetCStream() :
* This function is deprecated, and is equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
@@ -1639,10 +1808,11 @@
* If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
* If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
* For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
* but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
* @return : 0, or an error code (which can be tested using ZSTD_isError())
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
typedef struct {
@@ -1684,27 +1854,32 @@
*
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
* ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
*
* note: no dictionary will be used if dict == NULL or dictSize < 8
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+
/**
* This function is deprecated, and is equivalent to:
*
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
* ZSTD_DCtx_refDDict(zds, ddict);
*
* note : ddict is referenced, it must outlive decompression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+
/**
* This function is deprecated, and is equivalent to:
*
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
*
* re-use decompression parameters from previous init; saves dictionary loading
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
/*********************************************************************
@@ -1841,11 +2016,11 @@
unsigned headerSize;
unsigned dictID;
unsigned checksumFlag;
} ZSTD_frameHeader;
-/** ZSTD_getFrameHeader() :
+/*! ZSTD_getFrameHeader() :
* decode Frame Header, or requires larger `srcSize`.
* @return : 0, `zfhPtr` is correctly filled,
* >0, `srcSize` is too small, value is wanted `srcSize` amount,
* or an error code, which can be tested using ZSTD_isError() */
ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */
@@ -1874,27 +2049,29 @@
/** Block level API */
/* ============================ */
/*!
Block functions produce and decode raw zstd blocks, without frame metadata.
- Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
- User will have to take in charge required information to regenerate data, such as compressed and content sizes.
+ Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+ But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
A few rules to respect :
- Compressing and decompressing require a context structure
+ Use ZSTD_createCCtx() and ZSTD_createDCtx()
- It is necessary to init context before starting
+ compression : any ZSTD_compressBegin*() variant, including with dictionary
+ decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+ copyCCtx() and copyDCtx() can be used too
- Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+ If input is larger than a block size, it's necessary to split input data into multiple blocks
- + For inputs larger than a single block, really consider using regular ZSTD_compress() instead.
- Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
- - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
- In which case, nothing is produced into `dst` !
- + User must test for such outcome and deal directly with uncompressed data
- + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
+ + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+ Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+ - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+ ===> In which case, nothing is produced into `dst` !
+ + User __must__ test for such outcome and deal directly with uncompressed data
+ + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+ Doing so would mess up with statistics history, leading to potential data corruption.
+ + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+ In case of multiple successive blocks, should some of them be uncompressed,
decoder must be informed of their existence in order to follow proper history.
Use ZSTD_insertBlock() for such a case.
*/