lib/masticate/mender.rb in masticate-0.1.0 vs lib/masticate/mender.rb in masticate-0.1.1
- old
+ new
@@ -11,23 +11,40 @@
end
def mend(opts)
@output = opts[:output] ? File.open(opts[:output], "w") : $stdout
@col_sep = opts[:col_sep] || ','
+ @quote_char = opts[:quote_char] || "\0"
expected_field_count = nil
headers = nil
@output_count = 0
with_input do |input|
while (line = get) do
unless line =~ /^\s*$/
- if !expected_field_count
+ if opts[:inlined]
+ row = explode(line)
+ ncells = row.count/2-1
+ if !expected_field_count
+ headers = row[0..ncells]
+ expected_field_count = headers.count
+ emit(headers.to_csv(:col_sep => @col_sep))
+ else
+ if row[0..ncells] != headers
+ raise "Header mismatch on line #{@input_count}\n Expected: #{headers.join(',')}\n Found: #{row[0..ncells].join(',')}"
+ end
+ end
+ row = row[ncells+1..-1]
+ emit(row.to_csv(:col_sep => @col_sep))
+ elsif !expected_field_count
# trust the first row
- headers = explode(line)
+ headers = explode(line).map(&:strip)
case opts[:snip]
when Fixnum
headers.shift(opts[:snip])
+ when String
+ raise "TODO: snip named header. Multiple?"
when nil
# do nothing
else
raise "Do not understand snip instruction [#{opts[:snip].inspect}]"
end
@@ -41,11 +58,11 @@
line = line + ' ' + nextbit
running_count = fieldcount(line)
end
end
- if line.count(col_sep) > 2
+ unless opts[:dejunk] && junky?(line)
emit(line)
end
end
end
end
@@ -58,20 +75,17 @@
:headers => headers
}
end
def fieldcount(line)
- if col_sep == ','
- CSV.parse_line(line).count
- else
- line.count(col_sep)+1
- end
+ explode(line).count
end
def explode(line)
- if col_sep == ','
- CSV.parse_line(line).map(&:strip)
- else
- line.split(col_sep).map(&:strip)
- end
+ CSV.parse_line(line, :col_sep => col_sep, :quote_char => @quote_char)
+ end
+
+ # a line is "junky" if it has 2 or fewer fields with any content
+ def junky?(line)
+ explode(line).select {|s| s && !s.strip.empty?}.count <= 2
end
end