lib/masticate/mender.rb in masticate-0.1.1 vs lib/masticate/mender.rb in masticate-0.1.3

- old
+ new

@@ -16,26 +16,29 @@ @quote_char = opts[:quote_char] || "\0" expected_field_count = nil headers = nil @output_count = 0 + fieldcounts = Hash.new(0) with_input do |input| while (line = get) do unless line =~ /^\s*$/ if opts[:inlined] row = explode(line) ncells = row.count/2-1 if !expected_field_count headers = row[0..ncells] expected_field_count = headers.count + fieldcounts[headers.count] += 1 emit(headers.to_csv(:col_sep => @col_sep)) else if row[0..ncells] != headers raise "Header mismatch on line #{@input_count}\n Expected: #{headers.join(',')}\n Found: #{row[0..ncells].join(',')}" end end - row = row[ncells+1..-1] + row = row[ncells+1, expected_field_count] + fieldcounts[row.count] += 1 emit(row.to_csv(:col_sep => @col_sep)) elsif !expected_field_count # trust the first row headers = explode(line).map(&:strip) case opts[:snip] @@ -47,10 +50,11 @@ # do nothing else raise "Do not understand snip instruction [#{opts[:snip].inspect}]" end expected_field_count = headers.count + fieldcounts[headers.count] += 1 emit(headers.to_csv(:col_sep => @col_sep)) else running_count = fieldcount(line) while !input.eof? && running_count < expected_field_count do nextbit = get @@ -59,10 +63,11 @@ running_count = fieldcount(line) end end unless opts[:dejunk] && junky?(line) + fieldcounts[fieldcount(line)] += 1 emit(line) end end end end @@ -70,19 +75,20 @@ @output.close if opts[:output] { :input_count => @input_count, :output_count => @output_count, + :field_counts => fieldcounts, :headers => headers } end def fieldcount(line) explode(line).count end def explode(line) - CSV.parse_line(line, :col_sep => col_sep, :quote_char => @quote_char) + CSV.parse_line(line, :col_sep => @col_sep, :quote_char => @quote_char) end # a line is "junky" if it has 2 or fewer fields with any content def junky?(line) explode(line).select {|s| s && !s.strip.empty?}.count <= 2