# # This code is under public domain (CC0) # . # # To the extent possible under law, dearblue has waived all copyright # and related or neighboring rights to this work. # # dearblue # require_relative "../extlz4" require "stringio" require "rubygems" gem "xxhash", "~> 0.3" require "xxhash" module LZ4 def self.encode_old(first, *args) case args.size when 0 level = nil opts = StreamEncoder::OPTIONS when 1 level = args[0] if level.respond_to?(:to_hash) opts = StreamEncoder::OPTIONS.merge(level) level = nil else level = level.to_i opts = StreamEncoder::OPTIONS end when 2 level = args[0].to_i opts = StreamEncoder::OPTIONS.merge(args[1]) else raise ArgumentError, "wrong number of arguments (#{args.size + 1} for 1 .. 3)" end left = opts.keys - StreamEncoder::OPTIONS.keys unless left.empty? if left.size > 10 raise ArgumentError, "unknown key - #{left[0]} (for #{StreamEncoder::OPTIONS.keys.slice(0, 10).join(", ")} and more...)" else raise ArgumentError, "unknown key - #{left[0]} (for #{StreamEncoder::OPTIONS.keys.join(", ")})" end end if first.kind_of?(String) src = first dest = StringIO.new("".b) else src = nil dest = first end lz4 = StreamEncoder.new(dest, level || 1, opts[:blocksize], opts[:block_dependency], opts[:block_checksum], opts[:stream_checksum]) case when src lz4 << src lz4.close dest.string when block_given? begin yield(lz4) ensure lz4.close end else lz4 end end def self.decode_old(io, &block) if io.kind_of?(String) lz4 = StreamDecoder.new(StringIO.new(io)) dest = lz4.read lz4.close return dest end dec = StreamDecoder.new(io) return dec unless block_given? begin yield(dec) ensure dec.close end end module BasicStream MAGIC_NUMBER = 0x184D2204 MAGIC_NUMBER_LEGACY = 0x184C2102 BLOCK_MAXIMUM_SIZES = { # 0 => not available # 1 => not available # 2 => not available # 3 => not available 4 => 1 << 16, # 64 KiB 5 => 1 << 18, # 256 KiB 6 => 1 << 20, # 1 MiB 7 => 1 << 22, # 4 MiB } LITERAL_DATA_BLOCK_FLAG = 0x80000000 VERSION_NUMBER = 1 << 6 VERSION_NUMBER_MASK = 0x03 << 6 BLOCK_INDEPENDENCY = 1 << 5 BLOCK_CHECKSUM = 1 << 4 STREAM_SIZE = 1 << 3 STREAM_CHECKSUM = 1 << 2 PRESET_DICTIONARY = 1 << 0 Header = Struct.new(:magic, :version, :blockindependence, :blockchecksum, :streamchecksum, :blocksize, :streamsize, :predictid) class Header def self.load(io) case magic = io.read(4).unpack("V") when MAGIC_NUMBER_LEGACY new(magic, -1, true, false, false, 8 * 1024 * 1024, nil, nil) when MAGIC_NUMBER (sf, bd) = io.read(2).unpack("CC") version = (sf >> 6) & 0x03 raise "stream header error - wrong version number" unless version == 0x01 blockindependence = ((sf >> 5) & 0x01) == 0 ? false : true blockchecksum = ((sf >> 4) & 0x01) == 0 ? false : true streamsize = ((sf >> 3) & 0x01) == 0 ? false : true streamchecksum = ((sf >> 2) & 0x01) == 0 ? false : true # reserved = (sf >> 1) & 0x01 predictid = ((sf >> 0) & 0x01) == 0 ? false : true # reserved = (bd >> 7) & 0x01 blockmax = (bd >> 4) & 0x07 # reserved = (bd >> 0) & 0x0f blocksize = BLOCK_MAXIMUM_SIZES[blockmax] raise Error, "stream header error - wrong block maximum size (#{blockmax} for 4 .. 7)" unless blocksize streamsize = io.read(8).unpack("Q<")[0] if streamsize predictid = io.read(4).unpack("V")[0] if predictid headerchecksum = io.getbyte new(magic, version, blockindependence, blockchecksum, streamchecksum, blocksize, streamsize, predictid) else raise "could not recognized magic number (0x%08x)" % (magic || nil) end end def self.pack(*args) new(*args).pack end def pack raise "wrong magic number" unless magic == MAGIC_NUMBER raise "wrong version number" unless version == VERSION_NUMBER header = [magic].pack("V") sd = version | (blockindependence ? BLOCK_INDEPENDENCY : 0) | (blockchecksum ? BLOCK_CHECKSUM : 0) | (streamsize ? STREAM_SIZE : 0) | (streamchecksum ? STREAM_CHECKSUM : 0) | (predictid ? PRESET_DICTIONARY : 0) bd = (BLOCK_MAXIMUM_SIZES.rassoc(blocksize)[0] << 4) desc = [sd, bd].pack("CC") header << desc header << [streamsize].pack("Q<") if streamsize header << [predictid].pack("V") if predictid header << [XXhash.xxh32(desc) >> 8].pack("C") end end BlockHeader = Struct.new(:iscompress, :packedsize) class BlockHeader alias compress? iscompress undef iscompress undef iscompress= undef packedsize= def pack [(compress? ? 0 : LITERAL_DATA_BLOCK_FLAG) | packedsize].pack("V") end def self.pack(iscompress, packedsize) new(iscompress, packedsize).pack end def self.unpack(data) d = data.unpack("V")[0] new((d & LITERAL_DATA_BLOCK_FLAG) == 0 ? true : false, packedsize & ~LITERAL_DATA_BLOCK_FLAG) end def self.load(io) unpack io.read(4) end end end # # LZ4 stream encoder # class StreamEncoder include BasicStream OPTIONS = { legacy: false, blocksize: 7, block_dependency: false, block_checksum: false, stream_checksum: true, } def initialize(io, level, blocksize, block_dependency, block_checksum, stream_checksum) @block_checksum = !!block_checksum @stream_checksum = XXhash::XXhashInternal::StreamingHash32.new(0) if stream_checksum @blocksize = BLOCK_MAXIMUM_SIZES[blocksize] raise ArgumentError, "wrong blocksize (#{blocksize})" unless @blocksize @block_dependency = !!block_dependency level = level ? level.to_i : nil case when level.nil? || level < 4 level = nil when level > 16 level = 16 end @encoder = get_encoder(level, @block_dependency) @io = io @buf = "".force_encoding(Encoding::BINARY) header = [MAGIC_NUMBER].pack("V") sd = VERSION_NUMBER | (@block_dependency ? 0 : BLOCK_INDEPENDENCY) | (@block_checksum ? BLOCK_CHECKSUM : 0) | (false ? STREAM_SIZE : 0) | (@stream_checksum ? STREAM_CHECKSUM : 0) | (false ? PRESET_DICTIONARY : 0) bd = (blocksize << 4) desc = [sd, bd].pack("CC") header << desc # TODO: header << [stream_size].pack("Q<") if stream_size # TODO: header << [XXhash.xxh32(predict)].pack("V") if predict # preset dictionary header << [XXhash.xxh32(desc) >> 8].pack("C") @io << header end # # call-seq: # write(data) -> nil or self # # Write data to lz4 stream. # # If data is nil, return to process nothing. # # [RETURN (self)] # Success write process. # # [RETURN (nil)] # Given nil to data. # # [data (String)] # def write(data) return nil if data.nil? @slicebuf ||= "" @inputproxy ||= StringIO.new @inputproxy.string = String(data) until @inputproxy.eof? slicesize = @blocksize - @buf.bytesize slicesize = @blocksize if slicesize > @blocksize @buf << @inputproxy.read(slicesize, @slicebuf) export_block if @buf.bytesize >= @blocksize end self end # # Same as `write` method, but return self always. # def <<(data) write data self end def close export_block unless @buf.empty? @io << [0].pack("V") @io << [@stream_checksum.digest].pack("V") if @stream_checksum @io.flush if @io.respond_to?(:flush) @io = nil end private def get_encoder(level, block_dependency) workencbuf = "".force_encoding(Encoding::BINARY) if block_dependency streamencoder = LZ4::BlockEncoder.new(level) ->(src) { streamencoder.update(src, workencbuf) } else ->(src) { LZ4.block_encode(level, src, workencbuf) } end end private def export_block w = @encoder.(@buf) @stream_checksum.update(@buf) if @stream_checksum if w.bytesize < @buf.bytesize # 上限を超えずに圧縮できた @io << [w.bytesize].pack("V") << w else # 圧縮後は上限を超過したため、無圧縮データを出力する @io << [@buf.bytesize | LITERAL_DATA_BLOCK_FLAG].pack("V") << @buf w = @buf end if @block_checksum @io << [XXhash.xxh32(w)].pack("V") end @buf.clear end end # # LZ4 ストリームを伸張するためのクラスです。 # class StreamDecoder include BasicStream attr_reader :version attr_reader :blockindependence attr_reader :blockchecksum attr_reader :streamchecksum attr_reader :blockmaximum attr_reader :streamsize attr_reader :presetdict def initialize(io) magic = io.read(4).unpack("V")[0] case magic when MAGIC_NUMBER sf = io.getbyte @version = (sf >> 6) & 0x03 raise "stream header error - wrong version number" unless @version == 0x01 @blockindependence = ((sf >> 5) & 0x01) == 0 ? false : true @blockchecksum = ((sf >> 4) & 0x01) == 0 ? false : true streamsize = ((sf >> 3) & 0x01) == 0 ? false : true @streamchecksum = ((sf >> 2) & 0x01) == 0 ? false : true # reserved = (sf >> 1) & 0x01 presetdict = ((sf >> 0) & 0x01) == 0 ? false : true bd = io.getbyte # reserved = (bd >> 7) & 0x01 blockmax = (bd >> 4) & 0x07 # reserved = (bd >> 0) & 0x0f @blockmaximum = BLOCK_MAXIMUM_SIZES[blockmax] raise Error, "stream header error - wrong block maximum size (#{blockmax} for 4 .. 7)" unless @blockmaximum @streamsize = io.read(8).unpack("Q<")[0] if streamsize @presetdict = io.read(4).unpack("V")[0] if presetdict headerchecksum = io.getbyte if @blockindependence @decoder = LZ4.method(:block_decode) else @decoder = LZ4::BlockDecoder.new.method(:update) end when MAGIC_NUMBER_LEGACY @version = -1 @blockindependence = true @blockchecksum = false @streamchecksum = false @blockmaximum = 1 << 23 # 8 MiB @streamsize = nil @presetdict = nil @decoder = LZ4.method(:block_decode) else raise Error, "stream header error - wrong magic number" end @io = io @pos = 0 @readbuf = "".b @decodebuf = "".b end def close @io = nil end # # call-seq: # read -> string or nil # read(size) -> string or nil # read(size, dest) -> string or nil # def read(*args) case args.size when 0 read_all when 1 read_part(args[0].to_i, "") when 2 read_part(args[0].to_i, args[1]) else raise ArgumentError, "wrong number of arguments (#{args.size} for 0 .. 2)" end end def getbyte w = read(1) or return nil w.getbyte(0) end def eof !@pos end alias eof? eof def tell raise NotImplementedError end def seek(off, cur) raise NotImplementedError end def pos raise NotImplementedError end def pos=(pos) raise NotImplementedError end private def read_all if @buf dest = @buf.read else dest = "" end @buf = nil w = nil dest << w while w = getnextblock @pos = nil dest end private def read_part(size, dest) dest.clear return dest unless size > 0 return nil unless @pos @slicebuf ||= "" begin unless @buf && !@buf.eof? unless w = getnextblock @pos = nil if dest.empty? return nil else return dest end end # NOTE: StringIO を用いている理由について # ruby-2.1 で String#slice 系を使って新しい文字列を生成すると、ヒープ領域の確保量が㌧でもない状況になる。 # StringIO#read に読み込みバッファを与えることで、この問題を軽減している。 @buf ||= StringIO.new @buf.string = w end dest << @buf.read(size, @slicebuf) size -= @slicebuf.bytesize end while size > 0 dest end private def getnextblock return nil if @version == -1 && @io.eof? flags = @io.read(4).unpack("V")[0] iscomp = (flags >> 31) == 0 ? true : false blocksize = flags & 0x7fffffff return nil unless blocksize > 0 unless blocksize <= @blockmaximum raise LZ4::Error, "block size is too big (blocksize is #{blocksize}, but blockmaximum is #{@blockmaximum}. may have damaged)." end w = @io.read(blocksize, @readbuf) unless w.bytesize == blocksize raise LZ4::Error, "can not read block (readsize=#{w.bytesize}, needsize=#{blocksize} (#{"0x%x" % blocksize}))" end w = @decoder.(w, @blockmaximum, @decodebuf) if iscomp @io.read(4) if @blockchecksum # TODO: IMPLEMENT ME! compare checksum w end end end