lib/combine_pdf/parser.rb in combine_pdf-1.0.20 vs lib/combine_pdf/parser.rb in combine_pdf-1.0.21

- old
+ new

@@ -356,28 +356,38 @@ ########################################## ## parse a Stream ########################################## elsif @scanner.scan(/stream[ \t]*[\r\n]/) @scanner.pos += 1 if @scanner.peek(1) == "\n".freeze && @scanner.matched[-1] != "\n".freeze + # advance by the publshed stream length (if any) + old_pos = @scanner.pos + if(out.last.is_a?(Hash) && out.last[:Length].is_a?(Integer) && out.last[:Length].to_i > 2) + @scanner.pos += out.last[:Length].to_i - 2 + end + # the following was dicarded because some PDF files didn't have an EOL marker as required # str = @scanner.scan_until(/(\r\n|\r|\n)endstream/) # instead, a non-strict RegExp is used: - str = @scanner.scan_until(/endstream/) + # raise error if the stream doesn't end. - unless str + unless @scanner.skip_until(/endstream/) raise ParsingError, "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!" end + length = @scanner.pos - (old_pos + 9) + length = 0 if(length < 0) + length -= 1 if(@scanner.string[old_pos + length - 1] == "\n") + length -= 1 if(@scanner.string[old_pos + length - 1] == "\r") + str = (length > 0) ? @scanner.string.slice(old_pos, length) : '' # warn "CombinePDF parser: detected Stream #{str.length} bytes long #{str[0..3]}...#{str[-4..-1]}" # need to remove end of stream if out.last.is_a? Hash - # out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r) - out.last[:raw_stream_content] = unify_string str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT) + out.last[:raw_stream_content] = unify_string str.force_encoding(Encoding::ASCII_8BIT) else warn 'Stream not attached to dictionary!' - out << str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT) + out << str.force_encoding(Encoding::ASCII_8BIT) end ########################################## ## parse an Object after finished ########################################## elsif str = @scanner.scan(/endobj/)