parser.rb in combine_pdf-1.0.21

- old
+ new
@@ -356,28 +356,38 @@
         ##########################################
         ## parse a Stream
         ##########################################
         elsif @scanner.scan(/stream[ \t]*[\r\n]/)
           @scanner.pos += 1 if @scanner.peek(1) == "\n".freeze && @scanner.matched[-1] != "\n".freeze
+          # advance by the publshed stream length (if any)
+          old_pos = @scanner.pos
+          if(out.last.is_a?(Hash) && out.last[:Length].is_a?(Integer) && out.last[:Length].to_i > 2)
+            @scanner.pos += out.last[:Length].to_i - 2
+          end
+
           # the following was dicarded because some PDF files didn't have an EOL marker as required
           # str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
           # instead, a non-strict RegExp is used:
-          str = @scanner.scan_until(/endstream/)
+          
 
           # raise error if the stream doesn't end.
-          unless str
+          unless @scanner.skip_until(/endstream/)
             raise ParsingError, "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!"
           end
+          length = @scanner.pos - (old_pos + 9)
+          length = 0 if(length < 0)
+          length -= 1 if(@scanner.string[old_pos + length - 1] == "\n") 
+          length -= 1 if(@scanner.string[old_pos + length - 1] == "\r") 
+          str = (length > 0) ? @scanner.string.slice(old_pos, length) : ''
 
           # warn "CombinePDF parser: detected Stream #{str.length} bytes long #{str[0..3]}...#{str[-4..-1]}"
 
           # need to remove end of stream
           if out.last.is_a? Hash
-            # out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
-            out.last[:raw_stream_content] = unify_string str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT)
+            out.last[:raw_stream_content] = unify_string str.force_encoding(Encoding::ASCII_8BIT)
           else
             warn 'Stream not attached to dictionary!'
-            out << str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT)
+            out << str.force_encoding(Encoding::ASCII_8BIT)
           end
         ##########################################
         ## parse an Object after finished
         ##########################################
         elsif str = @scanner.scan(/endobj/)