Sha256: 9fc34146995ecb27ab754664624cb1c5afa3b3a12009eb3cd7af24b93668c027

Contents?: true

Size: 1.85 KB

Versions: 3

Compression:

Stored size: 1.85 KB

Contents

#!/usr/bin/env ruby
# LZW -- rpdf2txt -- 09.07.2008 -- hwyss@ywesee.com

module Rpdf2txt
  class LZW
    CLEAR = 256
    EOD = 257
    def self.decode data, early_change=1
      self.new(early_change).decode data
    end
    def initialize(early_change=1)
      @early_change = early_change
      @__dict = (0..255).collect { |num| num.chr }.push :clear, :eod
      init_dictionary
    end
    def init_dictionary
      @dictionary = @__dict.dup
      @code_length = 9
      @boundary = 512 - @early_change
    end
    def decode data
      bits, = data.unpack('B*')
      expected_codes = bits.size / 12
      code = old_code = 0
      result = ''
      max = codes = clears = 0
      while(!bits.empty? && (code = get_next_code bits) && code != EOD)
        codes += 1
        if code == CLEAR
          clears +=1 
          init_dictionary
          code = get_next_code bits
          if code.nil? || code == EOD 
            return result
          end
          result << @dictionary[code]
          old_code = code
        else
          if string = @dictionary[code]
            result << string
            update_dictionary @dictionary[old_code] + string[0,1]
            old_code = code
          elsif code == @dictionary.size
            string = @dictionary[old_code]
            string += string[0,1]
            result << string
            update_dictionary string
            old_code = code
          else
            raise 'Bad compressed code: %s' % code
          end
        end
        max = [max, result.size].max
      end
      result
    end
    def get_next_code bits
      chunk = bits.slice!(0, @code_length).to_i(2)
    end
    def update_dictionary(str)
      @dictionary.push str
      if @dictionary.size >= @boundary && @code_length < 12
        @code_length += 1
        @boundary = (2**@code_length - @early_change)
      end
      str
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
rpdf2txt-0.8.4 lib/rpdf2txt/lzw.rb
rpdf2txt-0.8.3 lib/rpdf2txt/lzw.rb
rpdf2txt-0.8.2 lib/rpdf2txt/lzw.rb