lib/io_streams/line/reader.rb in iostreams-1.5.1 vs lib/io_streams/line/reader.rb in iostreams-1.6.0

- old
+ new

@@ -36,16 +36,16 @@ # # buffer_size: [Integer] # Size of blocks to read from the input stream at a time. # Default: 65536 ( 64K ) # - # TODO: - # - Handle embedded line feeds when reading csv files. - # - Skip Comment lines. RegExp? - # - Skip "empty" / "blank" lines. RegExp? - # - Extract header line(s) / first non-comment, non-blank line - # - Embedded newline support, RegExp? or Proc? + # embedded_within: [String] + # Supports CSV files where a line may contain an embedded newline. + # For CSV files set `embedded_within: '"'` + # + # Note: + # * When using a line reader and the file_name ends with ".csv" then embedded_within is automatically set to `"` def initialize(input_stream, delimiter: nil, buffer_size: 65_536, embedded_within: nil, original_file_name: nil) super(input_stream) @embedded_within = embedded_within @buffer_size = buffer_size @@ -84,20 +84,32 @@ end end line_count end - # Reads each line per the @delimeter. It will account for embedded lines provided they are within double quotes. - # The embedded_within argument is set in IOStreams::LineReader + # Reads each line per the `delimeter`. + # Accounts for lines that contain the `delimiter` when the `delimeter` is within the `embedded_within` delimiter. + # For Example, CSV files can contain newlines embedded within double quotes. def readline line = _readline if line && @embedded_within initial_line_number = @line_number while line.count(@embedded_within).odd? - raise "Unclosed quoted field on line #{initial_line_number}" if eof? || line.length > @buffer_size * 10 - + if eof? || line.length > @buffer_size * 10 + raise(Errors::MalformedDataError.new( + "Unbalanced delimited field, delimiter: #{@embedded_within}", + initial_line_number + )) + end line << @delimiter - line << _readline + next_line = _readline + if next_line.nil? + raise(Errors::MalformedDataError.new( + "Unbalanced delimited field, delimiter: #{@embedded_within}", + initial_line_number + )) + end + line << next_line end end line end