lib/hexapdf/parser.rb in hexapdf-0.14.4 vs lib/hexapdf/parser.rb in hexapdf-0.15.0

- old
+ new

@@ -138,15 +138,17 @@ if tok.kind_of?(Tokenizer::Token) && tok == 'stream' unless object.kind_of?(Hash) raise_malformed("A stream needs a dictionary, not a(n) #{object.class}", pos: offset) end tok1 = @tokenizer.next_byte - tok2 = @tokenizer.next_byte if tok1 == 13 # 13=CR, 10=LF + if tok1 == 32 # space + maybe_raise("Keyword stream followed by space instead of LF or CR/LF", pos: @tokenizer.pos) + tok1 = @tokenizer.next_byte + end + tok2 = @tokenizer.next_byte if tok1 == 13 # CR if tok1 != 10 && tok1 != 13 - tok2 = @tokenizer.next_byte - maybe_raise("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos, - force: tok1 != 32 || (tok2 != 10 && tok2 != 13)) # 32=space + raise_malformed("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos) elsif tok1 == 13 && tok2 != 10 maybe_raise("Keyword stream must be followed by LF or CR/LF, not CR alone", pos: @tokenizer.pos) @tokenizer.pos -= 1 end @@ -212,11 +214,16 @@ else obj = load_object(XRefSection.in_use_entry(0, 0, pos)) unless obj.respond_to?(:xref_section) raise_malformed("Object is not a cross-reference stream", pos: pos) end - xref_section = obj.xref_section + begin + xref_section = obj.xref_section + rescue MalformedPDFError => e + e.pos = pos + raise + end trailer = obj.trailer unless xref_section.entry?(obj.oid, obj.gen) maybe_raise("Cross-reference stream doesn't contain entry for itself", pos: pos) xref_section.add_in_use_entry(obj.oid, obj.gen, pos) end @@ -399,10 +406,11 @@ msg = "#{$!} - trying cross-reference table reconstruction" @document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos) xref = XRefSection.new @tokenizer.pos = 0 + linearized = nil while true @tokenizer.skip_whitespace pos = @tokenizer.pos @tokenizer.scan_until(/(\n|\r\n?)+|\z/) next_new_line_pos = @tokenizer.pos @@ -414,16 +422,20 @@ tok = @tokenizer.next_integer_or_keyword rescue nil if @tokenizer.pos > next_new_line_pos @tokenizer.pos = next_new_line_pos elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj' xref.add_in_use_entry(token, gen, pos) + if linearized.nil? + obj = @tokenizer.next_object rescue nil + linearized = obj.kind_of?(Hash) && obj.key?(:Linearized) + end @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/) end elsif token.kind_of?(Tokenizer::Token) && token == 'trailer' obj = @tokenizer.next_object rescue nil # Use last trailer found in case of multiple revisions but use first trailer in case of # linearized file. - trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?) + trailer = obj if obj.kind_of?(Hash) && (!linearized || trailer.nil?) elsif token == Tokenizer::NO_MORE_TOKENS break else @tokenizer.pos = next_new_line_pos end