lib/hexapdf/parser.rb in hexapdf-0.14.2 vs lib/hexapdf/parser.rb in hexapdf-0.14.3

- old
+ new

@@ -70,22 +70,28 @@ # HexaPDF::XRefSection::Entry. def load_object(xref_entry) obj, oid, gen, stream = case xref_entry.type when :in_use - parse_indirect_object(xref_entry.pos) + if xref_entry.pos == 0 && xref_entry.oid != 0 + # Handle seen-in-the-wild objects with invalid offset 0 + maybe_raise("Indirect object (#{xref_entry.oid},#{xref_entry.gen}) has offset 0", pos: 0) + [nil, xref_entry.oid, xref_entry.gen, nil] + else + parse_indirect_object(xref_entry.pos) + end when :free [nil, xref_entry.oid, xref_entry.gen, nil] when :compressed load_compressed_object(xref_entry) else raise_malformed("Invalid cross-reference type '#{xref_entry.type}' encountered") end if xref_entry.oid != 0 && (oid != xref_entry.oid || gen != xref_entry.gen) raise_malformed("The oid,gen (#{oid},#{gen}) values of the indirect object don't match " \ - "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref") + "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref") end @document.wrap(obj, oid: oid, gen: gen, stream: stream) rescue HexaPDF::MalformedPDFError reconstructed_revision.object(xref_entry) @@ -131,11 +137,13 @@ raise_malformed("A stream needs a dictionary, not a(n) #{object.class}", pos: offset) end tok1 = @tokenizer.next_byte tok2 = @tokenizer.next_byte if tok1 == 13 # 13=CR, 10=LF if tok1 != 10 && tok1 != 13 - raise_malformed("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos) + tok2 = @tokenizer.next_byte + maybe_raise("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos, + force: tok1 != 32 || (tok2 != 10 && tok2 != 13)) # 32=space elsif tok1 == 13 && tok2 != 10 maybe_raise("Keyword stream must be followed by LF or CR/LF, not CR alone", pos: @tokenizer.pos) @tokenizer.pos -= 1 end @@ -388,17 +396,17 @@ xref = XRefSection.new @tokenizer.pos = 0 while true @tokenizer.skip_whitespace pos = @tokenizer.pos - @tokenizer.scan_until(/(\n|\r\n?)+/) + @tokenizer.scan_until(/(\n|\r\n?)+|\z/) next_new_line_pos = @tokenizer.pos @tokenizer.pos = pos - token = @tokenizer.next_token rescue nil + token = @tokenizer.next_integer_or_keyword rescue nil if token.kind_of?(Integer) - gen = @tokenizer.next_token rescue nil - tok = @tokenizer.next_token rescue nil + gen = @tokenizer.next_integer_or_keyword rescue nil + tok = @tokenizer.next_integer_or_keyword rescue nil if @tokenizer.pos > next_new_line_pos @tokenizer.pos = next_new_line_pos elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj' xref.add_in_use_entry(token, gen, pos) @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)