contrib/zstd/lib/dictBuilder/cover.c in extzstd-0.3.2 vs contrib/zstd/lib/dictBuilder/cover.c in extzstd-0.3.3
- old
+ new
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
@@ -24,31 +24,40 @@
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memset */
#include <time.h> /* clock */
+#ifndef ZDICT_STATIC_LINKING_ONLY
+# define ZDICT_STATIC_LINKING_ONLY
+#endif
+
#include "../common/mem.h" /* read */
#include "../common/pool.h"
#include "../common/threading.h"
-#include "cover.h"
#include "../common/zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../common/bits.h" /* ZSTD_highbit32 */
+#include "../zdict.h"
+#include "cover.h"
/*-*************************************
* Constants
***************************************/
+/**
+* There are 32bit indexes used to ref samples, so limit samples size to 4GB
+* on 64bit builds.
+* For 32bit builds we choose 1 GB.
+* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
+* contiguous buffer, so 1GB is already a high limit.
+*/
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
#define COVER_DEFAULT_SPLITPOINT 1.0
/*-*************************************
* Console display
***************************************/
#ifndef LOCALDISPLAYLEVEL
-static int g_displayLevel = 2;
+static int g_displayLevel = 0;
#endif
#undef DISPLAY
#define DISPLAY(...) \
{ \
fprintf(stderr, __VA_ARGS__); \
@@ -531,11 +540,11 @@
}
}
/**
* Prepare a context for dictionary building.
- * The context is only dependent on the parameter `d` and can used multiple
+ * The context is only dependent on the parameter `d` and can be used multiple
* times.
* Returns 0 on success or error code on error.
* The context must be destroyed with `COVER_ctx_destroy()`.
*/
static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
@@ -636,11 +645,11 @@
return 0;
}
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
{
- const double ratio = (double)nbDmers / maxDictSize;
+ const double ratio = (double)nbDmers / (double)maxDictSize;
if (ratio >= 10) {
return;
}
LOCALDISPLAYLEVEL(displayLevel, 1,
"WARNING: The maximum dictionary size %u is too large "
@@ -732,11 +741,11 @@
BYTE* const dict = (BYTE*)dictBuffer;
COVER_ctx_t ctx;
COVER_map_t activeDmers;
parameters.splitPoint = 1.0;
/* Initialize global data */
- g_displayLevel = parameters.zParams.notificationLevel;
+ g_displayLevel = (int)parameters.zParams.notificationLevel;
/* Checks */
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
return ERROR(parameter_outOfBound);
}
@@ -940,13 +949,21 @@
}
ZSTD_pthread_mutex_unlock(&best->mutex);
}
}
+static COVER_dictSelection_t setDictSelection(BYTE* buf, size_t s, size_t csz)
+{
+ COVER_dictSelection_t ds;
+ ds.dictContent = buf;
+ ds.dictSize = s;
+ ds.totalCompressedSize = csz;
+ return ds;
+}
+
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
- COVER_dictSelection_t selection = { NULL, 0, error };
- return selection;
+ return setDictSelection(NULL, 0, error);
}
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
}
@@ -995,13 +1012,12 @@
free(candidateDictBuffer);
return COVER_dictSelectionError(totalCompressedSize);
}
if (params.shrinkDict == 0) {
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
free(candidateDictBuffer);
- return selection;
+ return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize);
}
largestDict = dictContentSize;
largestCompressed = totalCompressedSize;
dictContentSize = ZDICT_DICTSIZE_MIN;
@@ -1029,24 +1045,20 @@
free(largestDictbuffer);
free(candidateDictBuffer);
return COVER_dictSelectionError(totalCompressedSize);
}
- if (totalCompressedSize <= largestCompressed * regressionTolerance) {
- COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
+ if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) {
free(largestDictbuffer);
- return selection;
+ return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize );
}
dictContentSize *= 2;
}
dictContentSize = largestDict;
totalCompressedSize = largestCompressed;
- {
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
- free(candidateDictBuffer);
- return selection;
- }
+ free(candidateDictBuffer);
+ return setDictSelection( largestDictbuffer, dictContentSize, totalCompressedSize );
}
/**
* Parameters for COVER_tryParameters().
*/
@@ -1060,22 +1072,23 @@
/**
* Tries a set of parameters and updates the COVER_best_t with the results.
* This function is thread safe if zstd is compiled with multithreaded support.
* It takes its parameters as an *OWNING* opaque pointer to support threading.
*/
-static void COVER_tryParameters(void *opaque) {
+static void COVER_tryParameters(void *opaque)
+{
/* Save parameters as local variables */
- COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
+ COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
const COVER_ctx_t *const ctx = data->ctx;
const ZDICT_cover_params_t parameters = data->parameters;
size_t dictBufferCapacity = data->dictBufferCapacity;
size_t totalCompressedSize = ERROR(GENERIC);
/* Allocate space for hash table, dict, and freqs */
COVER_map_t activeDmers;
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+ BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
- U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+ U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
goto _cleanup;
}
if (!dict || !freqs) {
@@ -1101,18 +1114,17 @@
free(dict);
COVER_best_finish(data->best, parameters, selection);
free(data);
COVER_map_destroy(&activeDmers);
COVER_dictSelectionFree(selection);
- if (freqs) {
- free(freqs);
- }
+ free(freqs);
}
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_cover_params_t *parameters) {
+ void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
+ const size_t* samplesSizes, unsigned nbSamples,
+ ZDICT_cover_params_t* parameters)
+{
/* constants */
const unsigned nbThreads = parameters->nbThreads;
const double splitPoint =
parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;