# -*- encoding: utf-8; frozen_string_literal: true -*- # #-- # This file is part of HexaPDF. # # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby # Copyright (C) 2014-2019 Thomas Leitner # # HexaPDF is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License version 3 as # published by the Free Software Foundation with the addition of the # following permission added to Section 15 as permitted in Section 7(a): # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON # INFRINGEMENT OF THIRD PARTY RIGHTS. # # HexaPDF is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public # License for more details. # # You should have received a copy of the GNU Affero General Public License # along with HexaPDF. If not, see . # # The interactive user interfaces in modified source and object code # versions of HexaPDF must display Appropriate Legal Notices, as required # under Section 5 of the GNU Affero General Public License version 3. # # In accordance with Section 7(b) of the GNU Affero General Public # License, a covered work must retain the producer line in every PDF that # is created or manipulated using HexaPDF. # # If the GNU Affero General Public License doesn't fit your need, # commercial licenses are available at . #++ require 'fiber' require 'hexapdf/utils/bit_stream' require 'hexapdf/filter/predictor' require 'hexapdf/error' module HexaPDF module Filter # Implements the LZW filter. # # Since LZW uses a tightly packed bit stream in which codes are of varying bit lengths and are # not aligned to byte boundaries, this filter is not as fast as the other filters. If speed is # a concern, the FlateDecode filter should be used instead. # # See: HexaPDF::Filter, PDF1.7 s7.4.4 module LZWDecode CLEAR_TABLE = 256 # :nodoc: EOD = 257 # :nodoc: INITIAL_ENCODER_TABLE = {} #:nodoc: 0.upto(255) {|i| INITIAL_ENCODER_TABLE[i.chr.freeze] = i } INITIAL_ENCODER_TABLE[CLEAR_TABLE] = CLEAR_TABLE INITIAL_ENCODER_TABLE[EOD] = EOD INITIAL_DECODER_TABLE = {} #:nodoc: 0.upto(255) {|i| INITIAL_DECODER_TABLE[i] = i.chr } INITIAL_DECODER_TABLE[CLEAR_TABLE] = CLEAR_TABLE INITIAL_DECODER_TABLE[EOD] = EOD # See HexaPDF::Filter def self.decoder(source, options = nil) fib = Fiber.new do # initialize decoder state code_length = 9 table = INITIAL_DECODER_TABLE.dup stream = HexaPDF::Utils::BitStreamReader.new result = ''.b finished = false last_code = CLEAR_TABLE while !finished && source.alive? && (data = source.resume) stream.append_data(data) while (code = stream.read(code_length)) # Decoder is one step behind => subtract 1! # We check the table size before entering the next code into it => subtract 1, but # there is one exception: After table entry 4095 is written, the clear table code # also gets written with code length 12, case table.size when 510, 1022, 2046 code_length += 1 when 4096 if code != CLEAR_TABLE raise FilterError, "Maximum of 12bit for codes in LZW stream exceeded" end end if code == EOD finished = true break elsif code == CLEAR_TABLE # reset decoder state code_length = 9 table = INITIAL_DECODER_TABLE.dup elsif last_code == CLEAR_TABLE unless table.key?(code) raise FilterError, "Unknown code in LZW encoded stream found" end result << table[code] else unless table.key?(last_code) raise FilterError, "Unknown code in LZW encoded stream found" end last_str = table[last_code] str = if table.key?(code) table[code] else last_str + last_str[0] end result << str table[table.size] = last_str + str[0] end last_code = code end Fiber.yield(result) result = ''.b end end if options && options[:Predictor] Predictor.decoder(fib, options) else fib end end # See HexaPDF::Filter def self.encoder(source, options = nil) if options && options[:Predictor] source = Predictor.encoder(source, options) end Fiber.new do # initialize encoder state code_length = 9 table = INITIAL_ENCODER_TABLE.dup # initialize the bit stream with the clear-table marker stream = HexaPDF::Utils::BitStreamWriter.new result = stream.write(CLEAR_TABLE, 9) str = ''.b while source.alive? && (data = source.resume) data.each_char do |char| newstr = str + char if table.key?(newstr) str = newstr else result << stream.write(table[str], code_length) table[newstr.freeze] = table.size str = char end case table.size when 512 then code_length = 10 when 1024 then code_length = 11 when 2048 then code_length = 12 when 4096 result << stream.write(CLEAR_TABLE, code_length) # reset encoder state code_length = 9 table = INITIAL_ENCODER_TABLE.dup end end Fiber.yield(result) result = ''.b end result = stream.write(table[str], code_length) result << stream.write(EOD, code_length) result << stream.finalize result end end end end end