lib/combine_pdf/parser.rb in combine_pdf-0.2.5 vs lib/combine_pdf/parser.rb in combine_pdf-0.2.6
- old
+ new
@@ -63,14 +63,17 @@
return @parsed unless @parsed.empty?
@scanner = StringScanner.new @string_to_parse
@scanner.pos = 0
if @scanner.scan /\%PDF\-[\d\-\.]+/
@version = @scanner.matched.scan(/[\d\.]+/)[0].to_f
+ @scanner.skip_until /[\n\r]+/
+ # @scanner.skip /[^\d]*/
end
-
@parsed = _parse_
+ raise "Unknown PDF parsing error - maleformed PDF file?" unless (@parsed.select {|i| !i.is_a?(Hash)}).empty?
+
if @root_object == {}
xref_streams = @parsed.select {|obj| obj.is_a?(Hash) && obj[:Type] == :XRef}
xref_streams.each do |xref_dictionary|
@root_object.merge! xref_dictionary
end
@@ -173,12 +176,12 @@
str = @scanner.scan_until(/endstream/)
# raise error if the stream doesn't end.
raise "Parsing Error: PDF file error - a stream object wasn't properly colsed using 'endstream'!" unless str
# need to remove end of stream
if out.last.is_a? Hash
- out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
- # out.last[:raw_stream_content] = str.gsub(/[\n\r]?[\n\r]?endstream/, "")
+ # out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
+ out.last[:raw_stream_content] = str.gsub(/[\n\r]?[\n\r]endstream\z/, "")
else
warn "Stream not attached to dictionary!"
out << str[0...-10].force_encoding(Encoding::ASCII_8BIT)
end
##########################################
@@ -321,10 +324,10 @@
@scanner.scan_until(/(trailer)|(\%EOF)/)
if @scanner.matched[-1] == 'r'
if @scanner.skip_until(/<</)
data = _parse_
- @root_object = {}
+ @root_object ||= {}
@root_object[data.shift] = data.shift while data[0]
end
##########
## skip untill end of segment, maked by %%EOF
@scanner.skip_until(/\%\%EOF/)
\ No newline at end of file