lib/pdf/reader/cmap.rb in pdf-reader-0.8.3 vs lib/pdf/reader/cmap.rb in pdf-reader-0.8.4

- old
+ new

@@ -7,14 +7,14 @@ # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION @@ -28,55 +28,110 @@ def initialize(data) @map = {} in_char_mode = false in_range_mode = false + instructions = "" data.each_line do |l| if l.include?("beginbfchar") - in_char_mode = true + in_char_mode = true elsif l.include?("endbfchar") - in_char_mode = false + process_bfchar_instructions(instructions) + instructions = "" + in_char_mode = false elsif l.include?("beginbfrange") - in_range_mode = true + in_range_mode = true elsif l.include?("endbfrange") - in_range_mode = false + process_bfrange_instructions(instructions) + instructions = "" + in_range_mode = false end - if in_char_mode - process_bfchar_line(l) - elsif in_range_mode - process_bfrange_line(l) + if !l.include?("begin") && (in_char_mode || in_range_mode) + instructions << l end end end + def size + @map.size + end + def decode(c) # TODO: implement the conversion return c unless c.class == Fixnum @map[c] end private - def process_bfchar_line(l) - m, find, replace = *l.match(/<([0-9a-fA-F]+)>\s*<([0-9a-fA-F]+)>/) - @map["0x#{find}".hex] = "0x#{replace}".hex if find && replace + def build_parser(instructions) + buffer = Buffer.new(StringIO.new(instructions)) + Parser.new(buffer) end - def process_bfrange_line(l) - m, start_code, end_code, dst = *l.match(/<([0-9a-fA-F]+)>\s*<([0-9a-fA-F]+)>\s*<([0-9a-fA-F]+)>/) - if start_code && end_code && dst - start_code = "0x#{start_code}".hex - end_code = "0x#{end_code}".hex - dst = "0x#{dst}".hex + def str_to_int(str) + return nil if str.nil? || str.size == 0 || str.size >= 3 - # add all values in the range to our mapping - (start_code..end_code).each_with_index do |val, idx| - @map[val] = dst + idx - # ensure a single range does not exceed 255 chars - raise PDF::Reader::MalformedPDFError, "a CMap bfrange cann't exceed 255 chars" if idx > 255 + if str.size == 1 + str.unpack("C*")[0] + else + str.unpack("n*")[0] + end + end + + def process_bfchar_instructions(instructions) + parser = build_parser(instructions) + find = str_to_int(parser.parse_token) + replace = str_to_int(parser.parse_token) + while find && replace + @map[find] = replace + find = str_to_int(parser.parse_token) + replace = str_to_int(parser.parse_token) + end + end + + def process_bfrange_instructions(instructions) + parser = build_parser(instructions) + start = parser.parse_token + finish = parser.parse_token + to = parser.parse_token + while start && finish && to + if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String) + bfrange_type_one(start, finish, to) + elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array) + bfrange_type_two(start, finish, to) + else + raise "invalid bfrange section" end + start = parser.parse_token + finish = parser.parse_token + to = parser.parse_token + end + end + + def bfrange_type_one(start_code, end_code, dst) + start_code = str_to_int(start_code) + end_code = str_to_int(end_code) + dst = str_to_int(dst) + + # add all values in the range to our mapping + (start_code..end_code).each_with_index do |val, idx| + @map[val] = dst + idx + # ensure a single range does not exceed 255 chars + raise PDF::Reader::MalformedPDFError, "a CMap bfrange cann't exceed 255 chars" if idx > 255 + end + end + + def bfrange_type_two(start_code, end_code, dst) + start_code = str_to_int(start_code) + end_code = str_to_int(end_code) + from_range = (start_code..end_code) + + # add all values in the range to our mapping + from_range.each_with_index do |val, idx| + @map[val] = str_to_int(dst[idx]) end end end end