lib/masticate/mender.rb in masticate-0.1.0 vs lib/masticate/mender.rb in masticate-0.1.1

- old
+ new

@@ -11,23 +11,40 @@ end def mend(opts) @output = opts[:output] ? File.open(opts[:output], "w") : $stdout @col_sep = opts[:col_sep] || ',' + @quote_char = opts[:quote_char] || "\0" expected_field_count = nil headers = nil @output_count = 0 with_input do |input| while (line = get) do unless line =~ /^\s*$/ - if !expected_field_count + if opts[:inlined] + row = explode(line) + ncells = row.count/2-1 + if !expected_field_count + headers = row[0..ncells] + expected_field_count = headers.count + emit(headers.to_csv(:col_sep => @col_sep)) + else + if row[0..ncells] != headers + raise "Header mismatch on line #{@input_count}\n Expected: #{headers.join(',')}\n Found: #{row[0..ncells].join(',')}" + end + end + row = row[ncells+1..-1] + emit(row.to_csv(:col_sep => @col_sep)) + elsif !expected_field_count # trust the first row - headers = explode(line) + headers = explode(line).map(&:strip) case opts[:snip] when Fixnum headers.shift(opts[:snip]) + when String + raise "TODO: snip named header. Multiple?" when nil # do nothing else raise "Do not understand snip instruction [#{opts[:snip].inspect}]" end @@ -41,11 +58,11 @@ line = line + ' ' + nextbit running_count = fieldcount(line) end end - if line.count(col_sep) > 2 + unless opts[:dejunk] && junky?(line) emit(line) end end end end @@ -58,20 +75,17 @@ :headers => headers } end def fieldcount(line) - if col_sep == ',' - CSV.parse_line(line).count - else - line.count(col_sep)+1 - end + explode(line).count end def explode(line) - if col_sep == ',' - CSV.parse_line(line).map(&:strip) - else - line.split(col_sep).map(&:strip) - end + CSV.parse_line(line, :col_sep => col_sep, :quote_char => @quote_char) + end + + # a line is "junky" if it has 2 or fewer fields with any content + def junky?(line) + explode(line).select {|s| s && !s.strip.empty?}.count <= 2 end end