lib/combine_pdf/parser.rb in combine_pdf-1.0.26 vs lib/combine_pdf/parser.rb in combine_pdf-1.0.27

- old
+ new

@@ -260,11 +260,11 @@ ########################################## ## parse a Literal String ########################################## elsif @scanner.scan(/\(/) # warn "Found a literal string" - str = ''.force_encoding(Encoding::ASCII_8BIT) + str = ''.b count = 1 while count > 0 && @scanner.rest? scn = @scanner.scan_until(/[\(\)]/) unless scn warn "Unknown error parsing string at #{@scanner.pos} for string: #{str}!" @@ -367,21 +367,21 @@ end # the following was dicarded because some PDF files didn't have an EOL marker as required # str = @scanner.scan_until(/(\r\n|\r|\n)endstream/) # instead, a non-strict RegExp is used: - + # raise error if the stream doesn't end. unless @scanner.skip_until(/endstream/) raise ParsingError, "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!" end length = @scanner.pos - (old_pos + 9) length = 0 if(length < 0) length -= 1 if(@scanner.string[old_pos + length - 1] == "\n") length -= 1 if(@scanner.string[old_pos + length - 1] == "\r") - str = (length > 0) ? @scanner.string.slice(old_pos, length) : '' + str = (length > 0) ? @scanner.string.slice(old_pos, length) : +'' # warn "CombinePDF parser: detected Stream #{str.length} bytes long #{str[0..3]}...#{str[-4..-1]}" # need to remove end of stream if out.last.is_a? Hash @@ -630,21 +630,21 @@ # # should be moved to the parser's workflow. # def serialize_objects_and_references obj_dir = {} - objid_cache = {} + objid_cache = {}.compare_by_identity # create a dictionary for referenced objects (no value resolution at this point) # at the same time, delete duplicates and old versions when objects have multiple versions @parsed.uniq! @parsed.length.times do |i| o = @parsed[i] - objid_cache[o.object_id] = i + objid_cache[o] = i tmp_key = [o[:indirect_reference_id], o[:indirect_generation_number]] if tmp_found = obj_dir[tmp_key] tmp_found.clear - @parsed[objid_cache[tmp_found.object_id]] = nil + @parsed[objid_cache[tmp_found]] = nil end obj_dir[tmp_key] = o end @parsed.compact! objid_cache.clear @@ -763,21 +763,21 @@ # new_data # end # end # # run block of code on evey PDF object (PDF objects are class Hash) - # def each_object(object, limit_references = true, already_visited = {}, &block) + # def each_object(object, limit_references = true, already_visited = {}.compare_by_identity, &block) # unless limit_references - # already_visited[object.object_id] = true + # already_visited[object] = true # end # case # when object.is_a?(Array) # object.each {|obj| each_object(obj, limit_references, already_visited, &block)} # when object.is_a?(Hash) # yield(object) # unless limit_references && object[:is_reference_only] # object.each do |k,v| - # each_object(v, limit_references, already_visited, &block) unless already_visited[v.object_id] + # each_object(v, limit_references, already_visited, &block) unless already_visited[v] # end # end # end # end end