module SSE module Impl class BufferedLineReader # # Reads a series of data chunks from an enumerator, and returns an enumerator that # parses/aggregates these into text lines. The line terminator may be CR, LF, or # CRLF for each line; terminators are not included in the returned lines. When the # input data runs out, the output enumerator ends and does not include any partially # completed line. # # @param [Enumerator] chunks an enumerator that will yield strings from a stream - # these are treated as raw UTF-8 bytes, regardless of the string's declared encoding # (so it is OK if a multi-byte character is split across chunks); if the declared # encoding of the chunk is not ASCII-8BIT, it will be changed to ASCII-8BIT in place # @return [Enumerator] an enumerator that will yield one line at a time in UTF-8 # def self.lines_from(chunks) buffer = "".b position = 0 line_start = 0 last_char_was_cr = false Enumerator.new do |gen| chunks.each do |chunk| chunk.force_encoding("ASCII-8BIT") buffer << chunk loop do # Search for a line break in any part of the buffer that we haven't yet seen. i = buffer.index(/[\r\n]/, position) if i.nil? # There isn't a line break yet, so we'll keep accumulating data in the buffer, using # position to keep track of where we left off scanning. We can also discard any previously # parsed lines from the buffer at this point. if line_start > 0 buffer.slice!(0, line_start) line_start = 0 end position = buffer.length break end ch = buffer[i] if i == 0 && ch == "\n" && last_char_was_cr # This is just the dangling LF of a CRLF pair last_char_was_cr = false i += 1 position = i line_start = i next end line = buffer[line_start, i - line_start].force_encoding("UTF-8") # Calling force_encoding just declares that we believe the encoding of this string to be # UTF-8 (which is the only encoding allowed in the SSE protocol); it doesn't cause any # re-decoding of the string. The previous line-parsing steps were done on raw 8-bit # strings so that it won't try to do any UTF-8 decoding on intermediate slices. last_char_was_cr = false i += 1 if ch == "\r" if i == buffer.length last_char_was_cr = true # We'll break the line here, but be on watch for a dangling LF elsif buffer[i] == "\n" i += 1 end end if i == buffer.length buffer = "" i = 0 end position = i line_start = i gen.yield line end end end end end end end