lib/combine_pdf/parser.rb in combine_pdf-0.2.5 vs lib/combine_pdf/parser.rb in combine_pdf-0.2.6

- old
+ new

@@ -63,14 +63,17 @@ return @parsed unless @parsed.empty? @scanner = StringScanner.new @string_to_parse @scanner.pos = 0 if @scanner.scan /\%PDF\-[\d\-\.]+/ @version = @scanner.matched.scan(/[\d\.]+/)[0].to_f + @scanner.skip_until /[\n\r]+/ + # @scanner.skip /[^\d]*/ end - @parsed = _parse_ + raise "Unknown PDF parsing error - maleformed PDF file?" unless (@parsed.select {|i| !i.is_a?(Hash)}).empty? + if @root_object == {} xref_streams = @parsed.select {|obj| obj.is_a?(Hash) && obj[:Type] == :XRef} xref_streams.each do |xref_dictionary| @root_object.merge! xref_dictionary end @@ -173,12 +176,12 @@ str = @scanner.scan_until(/endstream/) # raise error if the stream doesn't end. raise "Parsing Error: PDF file error - a stream object wasn't properly colsed using 'endstream'!" unless str # need to remove end of stream if out.last.is_a? Hash - out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r) - # out.last[:raw_stream_content] = str.gsub(/[\n\r]?[\n\r]?endstream/, "") + # out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r) + out.last[:raw_stream_content] = str.gsub(/[\n\r]?[\n\r]endstream\z/, "") else warn "Stream not attached to dictionary!" out << str[0...-10].force_encoding(Encoding::ASCII_8BIT) end ########################################## @@ -321,10 +324,10 @@ @scanner.scan_until(/(trailer)|(\%EOF)/) if @scanner.matched[-1] == 'r' if @scanner.skip_until(/<</) data = _parse_ - @root_object = {} + @root_object ||= {} @root_object[data.shift] = data.shift while data[0] end ########## ## skip untill end of segment, maked by %%EOF @scanner.skip_until(/\%\%EOF/) \ No newline at end of file