lib/combine_pdf/parser.rb in combine_pdf-1.0.26 vs lib/combine_pdf/parser.rb in combine_pdf-1.0.27
- old
+ new
@@ -260,11 +260,11 @@
##########################################
## parse a Literal String
##########################################
elsif @scanner.scan(/\(/)
# warn "Found a literal string"
- str = ''.force_encoding(Encoding::ASCII_8BIT)
+ str = ''.b
count = 1
while count > 0 && @scanner.rest?
scn = @scanner.scan_until(/[\(\)]/)
unless scn
warn "Unknown error parsing string at #{@scanner.pos} for string: #{str}!"
@@ -367,21 +367,21 @@
end
# the following was dicarded because some PDF files didn't have an EOL marker as required
# str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
# instead, a non-strict RegExp is used:
-
+
# raise error if the stream doesn't end.
unless @scanner.skip_until(/endstream/)
raise ParsingError, "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!"
end
length = @scanner.pos - (old_pos + 9)
length = 0 if(length < 0)
length -= 1 if(@scanner.string[old_pos + length - 1] == "\n")
length -= 1 if(@scanner.string[old_pos + length - 1] == "\r")
- str = (length > 0) ? @scanner.string.slice(old_pos, length) : ''
+ str = (length > 0) ? @scanner.string.slice(old_pos, length) : +''
# warn "CombinePDF parser: detected Stream #{str.length} bytes long #{str[0..3]}...#{str[-4..-1]}"
# need to remove end of stream
if out.last.is_a? Hash
@@ -630,21 +630,21 @@
#
# should be moved to the parser's workflow.
#
def serialize_objects_and_references
obj_dir = {}
- objid_cache = {}
+ objid_cache = {}.compare_by_identity
# create a dictionary for referenced objects (no value resolution at this point)
# at the same time, delete duplicates and old versions when objects have multiple versions
@parsed.uniq!
@parsed.length.times do |i|
o = @parsed[i]
- objid_cache[o.object_id] = i
+ objid_cache[o] = i
tmp_key = [o[:indirect_reference_id], o[:indirect_generation_number]]
if tmp_found = obj_dir[tmp_key]
tmp_found.clear
- @parsed[objid_cache[tmp_found.object_id]] = nil
+ @parsed[objid_cache[tmp_found]] = nil
end
obj_dir[tmp_key] = o
end
@parsed.compact!
objid_cache.clear
@@ -763,21 +763,21 @@
# new_data
# end
# end
# # run block of code on evey PDF object (PDF objects are class Hash)
- # def each_object(object, limit_references = true, already_visited = {}, &block)
+ # def each_object(object, limit_references = true, already_visited = {}.compare_by_identity, &block)
# unless limit_references
- # already_visited[object.object_id] = true
+ # already_visited[object] = true
# end
# case
# when object.is_a?(Array)
# object.each {|obj| each_object(obj, limit_references, already_visited, &block)}
# when object.is_a?(Hash)
# yield(object)
# unless limit_references && object[:is_reference_only]
# object.each do |k,v|
- # each_object(v, limit_references, already_visited, &block) unless already_visited[v.object_id]
+ # each_object(v, limit_references, already_visited, &block) unless already_visited[v]
# end
# end
# end
# end
end