lib/masticate/mender.rb in masticate-0.0.3 vs lib/masticate/mender.rb in masticate-0.0.4

- old
+ new

@@ -2,48 +2,62 @@ # # A row that contains fewer delimiters than expected has been split across two lines # (due to a newline embedded in a field). Glue those two lines into a single line in the output. class Masticate::Mender - attr_reader :file + attr_reader :input - def initialize(file) - @file = file + def initialize(filename) + @input = open(filename) end def mend(opts) - output = opts[:output] + @output = opts[:output] ? File.open(opts[:output], "w") : $stdout col_sep = opts[:col_sep] - expected_count = nil - @input_count = output_count = 0 + expected_delim_count = nil + @input_count = @output_count = 0 while (line = get) do - if !expected_count - # trust the first row - expected_count = line.count(col_sep) - else - running_count = line.count(col_sep) - until line.nil? || running_count >= expected_count - nextbit = get - if nextbit - line = line.chomp + ' ' + nextbit - running_count = line.count(col_sep) - else - line = nil + unless line =~ /^\s*$/ + if !expected_delim_count + # trust the first row + expected_delim_count = line.count(col_sep) + else + running_count = line.count(col_sep) + while !input.eof? && running_count < expected_delim_count do + nextbit = get + if nextbit + line = line + ' ' + nextbit + running_count = line.count(col_sep) + end end end + if line.count(col_sep) > 2 + emit(line) + end end - output_count += 1 - output << line end + @input.close + @output.close if opts[:output] { :input_records => @input_count, - :output_records => output_count + :output_records => @output_count } end def get - (line = file.gets) && @input_count += 1 - line + line = input.gets + @input_count += 1 + line && line.chomp + end + + def emit(line) + @output_count += 1 + begin + @output.puts line + rescue Errno::EPIPE + # output was closed, e.g. ran piped into `head` + # silently ignore this condition, it's not fatal and doesn't need a warning + end end end