lib/pdf/reader/cmap.rb in pdf-reader-1.4.1 vs lib/pdf/reader/cmap.rb in pdf-reader-2.0.0.beta1
- old
+ new
@@ -29,37 +29,49 @@
# wraps a string containing a PDF CMap and provides convenience methods for
# extracting various useful information.
#
class CMap # :nodoc:
+ CMAP_KEYWORDS = {
+ "begincodespacerange" => 1,
+ "endcodespacerange" => 1,
+ "beginbfchar" => 1,
+ "endbfchar" => 1,
+ "beginbfrange" => 1,
+ "endbfrange" => 1,
+ "begin" => 1,
+ "begincmap" => 1,
+ "def" => 1
+ }
attr_reader :map
def initialize(data)
@map = {}
process_data(data)
end
def process_data(data)
+ parser = build_parser(data)
mode = nil
- instructions = ""
+ instructions = []
- data.each_line do |l|
- if l.include?("beginbfchar")
+ while token = parser.parse_token(CMAP_KEYWORDS)
+ if token == "beginbfchar"
mode = :char
- elsif l.include?("endbfchar")
+ elsif token == "endbfchar"
process_bfchar_instructions(instructions)
- instructions = ""
+ instructions = []
mode = nil
- elsif l.include?("beginbfrange")
+ elsif token == "beginbfrange"
mode = :range
- elsif l.include?("endbfrange")
+ elsif token == "endbfrange"
process_bfrange_instructions(instructions)
- instructions = ""
+ instructions = []
mode = nil
elsif mode == :char || mode == :range
- instructions << l
+ instructions << token
end
end
end
def size
@@ -103,35 +115,25 @@
unpacked_string
end
end
def process_bfchar_instructions(instructions)
- parser = build_parser(instructions)
- find = str_to_int(parser.parse_token)
- replace = str_to_int(parser.parse_token)
- while find && replace
- @map[find[0]] = replace
- find = str_to_int(parser.parse_token)
- replace = str_to_int(parser.parse_token)
+ instructions.each_slice(2) do |one, two|
+ find = str_to_int(one)
+ replace = str_to_int(two)
+ @map[find.first] = replace
end
end
def process_bfrange_instructions(instructions)
- parser = build_parser(instructions)
- start = parser.parse_token
- finish = parser.parse_token
- to = parser.parse_token
- while start && finish && to
+ instructions.each_slice(3) do |start, finish, to|
if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String)
bfrange_type_one(start, finish, to)
elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array)
bfrange_type_two(start, finish, to)
else
raise "invalid bfrange section"
end
- start = parser.parse_token
- finish = parser.parse_token
- to = parser.parse_token
end
end
def bfrange_type_one(start_code, end_code, dst)
start_code = str_to_int(start_code)[0]