lib/combine_pdf/parser.rb in combine_pdf-0.2.29 vs lib/combine_pdf/parser.rb in combine_pdf-0.2.30
- old
+ new
@@ -60,10 +60,11 @@
def parse
return [] if @string_to_parse.empty?
return @parsed unless @parsed.empty?
@scanner = StringScanner.new @string_to_parse
@scanner.pos = 0
+ @scanner.skip(/[^%]*/) if @scanner.exist?(/%PDF/i)
if @scanner.scan /\%PDF\-[\d\-\.]+/
@version = @scanner.matched.scan(/[\d\.]+/)[0].to_f
loop do
break unless @scanner.scan(/[^\d\r\n]+/)
break if @scanner.check(/([\d]+[\s]+[\d]+[\s]+obj[\n\r\s]+\<\<)|([\n\r]+)/)
@@ -353,11 +354,11 @@
elsif @scanner.scan(/null/)
out << nil
##########################################
## XREF - check for encryption... anything else?
##########################################
- elsif @scanner.scan(/xref/)
+ elsif @scanner.scan(/(startxref)|(xref)/)
##########
## get root object to check for encryption
@scanner.scan_until(/(trailer)|(\%EOF)/)
fresh = true
if @scanner.matched[-1] == 'r'
@@ -397,11 +398,11 @@
out << keep.pop
end
fresh = false
else
# always advance
- # warn "Advancing for unknown reason... #{@scanner.string[@scanner.pos-4, 8]} ... #{@scanner.peek(4)}" unless @scanner.peek(1) =~ /[\s\n]/
+ # warn "Advancing for unknown reason... #{@scanner.string[@scanner.pos - 4, 8]} ... #{@scanner.peek(4)}" unless @scanner.peek(1) =~ /[\s\n]/
warn 'Warning: parser advancing for unknown reason. Potential data-loss.'
@scanner.pos = @scanner.pos + 1
end
end
out
@@ -416,10 +417,11 @@
if root_object[:Root]
catalogs = root_object[:Root][:referenced_object] || root_object[:Root]
else
catalogs = (@parsed.select { |obj| obj[:Type] == :Catalog }).last
end
- @parsed.delete_if { |obj| obj[:Type] == :Catalog }
+
+ @parsed.delete_if { |obj| obj.nil? || obj[:Type] == :Catalog }
@parsed << catalogs
raise "Unknown error - parsed data doesn't contain a cataloged object!" unless catalogs
end
if catalogs.is_a?(Array)