cmap.rb in pdf-reader-2.9.0

- old
+ new

@@ -1,7 +1,7 @@
 # coding: utf-8
-# typed: false
+# typed: true
 # frozen_string_literal: true
 
 ################################################################################
 #
 # Copyright (C) 2008 James Healy (jimmy@deefa.com)
@@ -33,68 +33,71 @@
   # extracting various useful information.
   #
   class CMap # :nodoc:
 
     CMAP_KEYWORDS = {
-      "begincodespacerange" => 1,
-      "endcodespacerange" => 1,
-      "beginbfchar" => 1,
-      "endbfchar" => 1,
-      "beginbfrange" => 1,
-      "endbfrange" => 1,
-      "begin" => 1,
-      "begincmap" => 1,
-      "def" => 1
+      "begincodespacerange" => :noop,
+      "endcodespacerange" => :noop,
+      "beginbfchar" => :noop,
+      "endbfchar" => :noop,
+      "beginbfrange" => :noop,
+      "endbfrange" => :noop,
+      "begin" => :noop,
+      "begincmap" => :noop,
+      "def" => :noop
     }
 
     attr_reader :map
 
     def initialize(data)
       @map = {}
       process_data(data)
     end
 
-    def process_data(data)
-      parser = build_parser(data)
-      mode = :none
-      instructions = []
-
-      while token = parser.parse_token(CMAP_KEYWORDS)
-        if token == "beginbfchar"
-          mode = :char
-        elsif token == "endbfchar"
-          process_bfchar_instructions(instructions)
-          instructions = []
-          mode = :none
-        elsif token == "beginbfrange"
-          mode = :range
-        elsif token == "endbfrange"
-          process_bfrange_instructions(instructions)
-          instructions = []
-          mode = :none
-        elsif mode == :char || mode == :range
-          instructions << token
-        end
-      end
-    end
-
     def size
       @map.size
     end
 
     # Convert a glyph code into one or more Codepoints.
     #
     # Returns an array of Integers.
     #
     def decode(c)
-      # TODO: implement the conversion
-      return c unless Integer === c
-      @map[c]
+      @map.fetch(c, [])
     end
 
     private
 
+    def process_data(data, initial_mode = :none)
+      parser = build_parser(data)
+      mode = initial_mode
+      instructions = []
+
+      while token = parser.parse_token(CMAP_KEYWORDS)
+        if token.is_a?(String) || token.is_a?(Array)
+          if token == "beginbfchar"
+            mode = :char
+          elsif token == "endbfchar"
+            process_bfchar_instructions(instructions)
+            instructions = []
+            mode = :none
+          elsif token == "beginbfrange"
+            mode = :range
+          elsif token == "endbfrange"
+            process_bfrange_instructions(instructions)
+            instructions = []
+            mode = :none
+          elsif mode == :char
+            instructions << token.to_s
+          elsif mode == :range
+            instructions << token
+          end
+        end
+      end
+    end
+
+
     def build_parser(instructions)
       buffer = Buffer.new(StringIO.new(instructions))
       Parser.new(buffer)
     end
 
@@ -105,69 +108,79 @@
     #
     # However, some cmaps contain broken surrogate pairs and the ruby encoding support raises an
     # exception when we try converting broken UTF-16 to UTF-8
     #
     def str_to_int(str)
-      return nil if str.nil? || str.size == 0
       unpacked_string = if str.bytesize == 1 # UTF-8
         str.unpack("C*")
       else # UTF-16
          str.unpack("n*")
       end
       result = []
       while unpacked_string.any? do
-        if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
+        if unpacked_string.size >= 2 &&
+            unpacked_string.first.to_i > 0xD800 &&
+            unpacked_string.first.to_i < 0xDBFF
           # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
           # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
           # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
-          points = [unpacked_string.shift, unpacked_string.shift]
-          result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
+          point_one = unpacked_string.shift.to_i
+          point_two = unpacked_string.shift.to_i
+          result << (point_one - 0xD800) * 0x400 + (point_two - 0xDC00) + 0x10000
         else
           result << unpacked_string.shift
         end
       end
       result
     end
 
     def process_bfchar_instructions(instructions)
       instructions.each_slice(2) do |one, two|
-        find    = str_to_int(one)
-        replace = str_to_int(two)
-        @map[find.first] = replace
+        find    = str_to_int(one.to_s)
+        replace = str_to_int(two.to_s)
+        if find.any? && replace.any?
+          @map[find.first.to_i] = replace
+        end
       end
     end
 
     def process_bfrange_instructions(instructions)
       instructions.each_slice(3) do |start, finish, to|
         if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String)
           bfrange_type_one(start, finish, to)
         elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array)
           bfrange_type_two(start, finish, to)
         else
-          raise "invalid bfrange section"
+          raise MalformedPDFError, "invalid bfrange section"
         end
       end
     end
 
     def bfrange_type_one(start_code, end_code, dst)
-      start_code = str_to_int(start_code)[0]
-      end_code   = str_to_int(end_code)[0]
+      start_code = str_to_int(start_code).first
+      end_code   = str_to_int(end_code).first
       dst        = str_to_int(dst)
 
+      return if start_code.nil? || end_code.nil?
+
       # add all values in the range to our mapping
       (start_code..end_code).each_with_index do |val, idx|
-        @map[val] = dst.length == 1 ? [dst[0] + idx] : [dst[0], dst[1] + 1]
+        @map[val] = dst.length == 1 ? [dst[0].to_i + idx] : [dst[0].to_i, dst[1].to_i + 1]
       end
     end
 
     def bfrange_type_two(start_code, end_code, dst)
-      start_code = str_to_int(start_code)[0]
-      end_code   = str_to_int(end_code)[0]
+      start_code = str_to_int(start_code).first
+      end_code   = str_to_int(end_code).first
+
+      return if start_code.nil? || end_code.nil?
+
       from_range = (start_code..end_code)
 
       # add all values in the range to our mapping
       from_range.each_with_index do |val, idx|
-        @map[val] = str_to_int(dst[idx])
+        dst_char = dst[idx]
+        @map[val.to_i] = str_to_int(dst_char) if dst_char
       end
     end
   end
 end