lib/io_streams/delimited/reader.rb in iostreams-0.8.2 vs lib/io_streams/delimited/reader.rb in iostreams-0.9.0
- old
+ new
@@ -3,11 +3,11 @@
class Reader
attr_accessor :delimiter
# Read from a file or stream
def self.open(file_name_or_io, options={}, &block)
- if file_name_or_io.respond_to?(:read)
+ if IOStreams.reader_stream?(file_name_or_io)
block.call(new(file_name_or_io, options))
else
::File.open(file_name_or_io, 'rb') do |io|
block.call(new(io, options))
end
@@ -23,30 +23,28 @@
# Parameters
# input_stream
# The input stream that implements #read
#
# options
- # :delimiter[Symbol|String]
+ # :delimiter[String]
# Line / Record delimiter to use to break the stream up into records
- # nil
- # Automatically detect line endings and break up by line
- # Searches for the first "\r\n" or "\n" and then uses that as the
- # delimiter for all subsequent records
- # String:
- # Any string to break the stream up by
- # The records when saved will not include this delimiter
+ # Any string to break the stream up by
+ # The records when saved will not include this delimiter
# Default: nil
+ # Automatically detect line endings and break up by line
+ # Searches for the first "\r\n" or "\n" and then uses that as the
+ # delimiter for all subsequent records
#
# :buffer_size [Integer]
# Maximum size of the buffer into which to read the stream into for
# processing.
# Must be large enough to hold the entire first line and its delimiter(s)
# Default: 65536 ( 64K )
#
# :strip_non_printable [true|false]
# Strip all non-printable characters read from the file
- # Default: true iff :encoding is UTF8_ENCODING, otherwise false
+ # Default: false
#
# :encoding
# Force encoding to this encoding for all data being read
# Default: UTF8_ENCODING
# Set to nil to disable encoding
@@ -63,11 +61,11 @@
@delimiter.force_encoding(UTF8_ENCODING) if @delimiter && @encoding
@buffer = ''
end
# Returns each line at a time to to the supplied block
- def each_line(&block)
+ def each(&block)
partial = nil
loop do
if read_chunk == 0
block.call(partial) if partial
return
@@ -87,21 +85,41 @@
end
@buffer = partial.nil? ? '' : partial
end
end
+ alias_method :each_line, :each
+
+ # Reads length bytes from the I/O stream.
+ # Not recommended, but available if someone calls #read on this delimited reader
+ def read(length = nil, outbuf = nil)
+ if length
+ while (@buffer.size < length) && (read_chunk > 0)
+ end
+ data = @buffer.slice!(0, length)
+ outbuf << data if outbuf
+ data
+ else
+ while read_chunk > 0
+ end
+ @buffer
+ end
+ end
+
##########################################################################
private
+ NOT_PRINTABLE = Regexp.compile(/[^[:print:]|\r|\n]/)
+
# Returns [Integer] the number of bytes read into the internal buffer
# Returns 0 on EOF
def read_chunk
chunk = @input_stream.read(@buffer_size)
# EOF reached?
return 0 unless chunk
# Strip out non-printable characters before converting to UTF-8
- chunk = chunk.scan(/[[:print:]]|\r|\n/).join if @strip_non_printable
+ chunk.gsub!(NOT_PRINTABLE, '') if @strip_non_printable
@buffer << (@encoding ? chunk.force_encoding(@encoding) : chunk)
chunk.size
end