lib/combine_pdf/parser.rb in combine_pdf-1.0.20 vs lib/combine_pdf/parser.rb in combine_pdf-1.0.21
- old
+ new
@@ -356,28 +356,38 @@
##########################################
## parse a Stream
##########################################
elsif @scanner.scan(/stream[ \t]*[\r\n]/)
@scanner.pos += 1 if @scanner.peek(1) == "\n".freeze && @scanner.matched[-1] != "\n".freeze
+ # advance by the publshed stream length (if any)
+ old_pos = @scanner.pos
+ if(out.last.is_a?(Hash) && out.last[:Length].is_a?(Integer) && out.last[:Length].to_i > 2)
+ @scanner.pos += out.last[:Length].to_i - 2
+ end
+
# the following was dicarded because some PDF files didn't have an EOL marker as required
# str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
# instead, a non-strict RegExp is used:
- str = @scanner.scan_until(/endstream/)
+
# raise error if the stream doesn't end.
- unless str
+ unless @scanner.skip_until(/endstream/)
raise ParsingError, "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!"
end
+ length = @scanner.pos - (old_pos + 9)
+ length = 0 if(length < 0)
+ length -= 1 if(@scanner.string[old_pos + length - 1] == "\n")
+ length -= 1 if(@scanner.string[old_pos + length - 1] == "\r")
+ str = (length > 0) ? @scanner.string.slice(old_pos, length) : ''
# warn "CombinePDF parser: detected Stream #{str.length} bytes long #{str[0..3]}...#{str[-4..-1]}"
# need to remove end of stream
if out.last.is_a? Hash
- # out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
- out.last[:raw_stream_content] = unify_string str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT)
+ out.last[:raw_stream_content] = unify_string str.force_encoding(Encoding::ASCII_8BIT)
else
warn 'Stream not attached to dictionary!'
- out << str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT)
+ out << str.force_encoding(Encoding::ASCII_8BIT)
end
##########################################
## parse an Object after finished
##########################################
elsif str = @scanner.scan(/endobj/)