Sha256: f38210fef644dc1afe7c946ee85c29c546cd785c9501d125c1c0046664550fcc

Contents?: true

Size: 1.82 KB

Versions: 16

Compression:

Stored size: 1.82 KB

Contents

package compress

import "math"

// Estimate returns a normalized compressibility estimate of block b.
// Values close to zero are likely uncompressible.
// Values above 0.1 are likely to be compressible.
// Values above 0.5 are very compressible.
// Very small lengths will return 0.
func Estimate(b []byte) float64 {
	if len(b) < 16 {
		return 0
	}

	// Correctly predicted order 1
	hits := 0
	lastMatch := false
	var o1 [256]byte
	var hist [256]int
	c1 := byte(0)
	for _, c := range b {
		if c == o1[c1] {
			// We only count a hit if there was two correct predictions in a row.
			if lastMatch {
				hits++
			}
			lastMatch = true
		} else {
			lastMatch = false
		}
		o1[c1] = c
		c1 = c
		hist[c]++
	}

	// Use x^0.6 to give better spread
	prediction := math.Pow(float64(hits)/float64(len(b)), 0.6)

	// Calculate histogram distribution
	variance := float64(0)
	avg := float64(len(b)) / 256

	for _, v := range hist {
		Δ := float64(v) - avg
		variance += Δ * Δ
	}

	stddev := math.Sqrt(float64(variance)) / float64(len(b))
	exp := math.Sqrt(1 / float64(len(b)))

	// Subtract expected stddev
	stddev -= exp
	if stddev < 0 {
		stddev = 0
	}
	stddev *= 1 + exp

	// Use x^0.4 to give better spread
	entropy := math.Pow(stddev, 0.4)

	// 50/50 weight between prediction and histogram distribution
	return math.Pow((prediction+entropy)/2, 0.9)
}

// ShannonEntropyBits returns the number of bits minimum required to represent
// an entropy encoding of the input bytes.
// https://en.wiktionary.org/wiki/Shannon_entropy
func ShannonEntropyBits(b []byte) int {
	if len(b) == 0 {
		return 0
	}
	var hist [256]int
	for _, c := range b {
		hist[c]++
	}
	shannon := float64(0)
	invTotal := 1.0 / float64(len(b))
	for _, v := range hist[:] {
		if v > 0 {
			n := float64(v)
			shannon += math.Ceil(-math.Log2(n*invTotal) * n)
		}
	}
	return int(math.Ceil(shannon))
}

Version data entries

16 entries across 16 versions & 1 rubygems

Version Path
ruby_snowflake_client-1.3.7 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.3.6 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.3.5 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.3.4 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.3.4.pre.debug ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.3.3.pre.debug ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.3.2 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.3.1 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.3.0 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.2.1 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.2.0 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.1.1 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.1.0 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.0.2 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.0.1 ext/vendor/github.com/klauspost/compress/compressible.go
ruby_snowflake_client-1.0.0 ext/vendor/github.com/klauspost/compress/compressible.go