lib/pdf/reader/cmap.rb in pdf-reader-1.4.1 vs lib/pdf/reader/cmap.rb in pdf-reader-2.0.0.beta1

- old
+ new

@@ -29,37 +29,49 @@ # wraps a string containing a PDF CMap and provides convenience methods for # extracting various useful information. # class CMap # :nodoc: + CMAP_KEYWORDS = { + "begincodespacerange" => 1, + "endcodespacerange" => 1, + "beginbfchar" => 1, + "endbfchar" => 1, + "beginbfrange" => 1, + "endbfrange" => 1, + "begin" => 1, + "begincmap" => 1, + "def" => 1 + } attr_reader :map def initialize(data) @map = {} process_data(data) end def process_data(data) + parser = build_parser(data) mode = nil - instructions = "" + instructions = [] - data.each_line do |l| - if l.include?("beginbfchar") + while token = parser.parse_token(CMAP_KEYWORDS) + if token == "beginbfchar" mode = :char - elsif l.include?("endbfchar") + elsif token == "endbfchar" process_bfchar_instructions(instructions) - instructions = "" + instructions = [] mode = nil - elsif l.include?("beginbfrange") + elsif token == "beginbfrange" mode = :range - elsif l.include?("endbfrange") + elsif token == "endbfrange" process_bfrange_instructions(instructions) - instructions = "" + instructions = [] mode = nil elsif mode == :char || mode == :range - instructions << l + instructions << token end end end def size @@ -103,35 +115,25 @@ unpacked_string end end def process_bfchar_instructions(instructions) - parser = build_parser(instructions) - find = str_to_int(parser.parse_token) - replace = str_to_int(parser.parse_token) - while find && replace - @map[find[0]] = replace - find = str_to_int(parser.parse_token) - replace = str_to_int(parser.parse_token) + instructions.each_slice(2) do |one, two| + find = str_to_int(one) + replace = str_to_int(two) + @map[find.first] = replace end end def process_bfrange_instructions(instructions) - parser = build_parser(instructions) - start = parser.parse_token - finish = parser.parse_token - to = parser.parse_token - while start && finish && to + instructions.each_slice(3) do |start, finish, to| if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String) bfrange_type_one(start, finish, to) elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array) bfrange_type_two(start, finish, to) else raise "invalid bfrange section" end - start = parser.parse_token - finish = parser.parse_token - to = parser.parse_token end end def bfrange_type_one(start_code, end_code, dst) start_code = str_to_int(start_code)[0]