lib/masticate/mender.rb in masticate-0.3.1 vs lib/masticate/mender.rb in masticate-0.3.2
- old
+ new
@@ -10,24 +10,18 @@
@inlined = opts[:inlined]
@snip = opts[:snip]
@dejunk = opts[:dejunk]
@expected_field_count = nil
- @holding = []
+ @holding = ''
end
- # attr_reader :col_sep
-
- # def initialize(filename)
- # @filename = filename
- # end
-
def mend(opts)
execute(opts)
end
- def crunch(row)
+ def crunch(row, line = '', csv_options = {})
if @inlined
if row
ncells = row.count/2-1
if !@headers
@headers = row[0..ncells]
@@ -54,97 +48,31 @@
raise "Do not understand snip instruction [#{@snip.inspect}]"
end
@expected_field_count = @headers.count
row = @headers
elsif row
- @holding += row
- if @holding.count < @expected_field_count
+ @holding << ' ' unless @holding.empty?
+ @holding << line
+
+ row = CSV.parse_line(@holding, csv_options) #.map {|s| s && s.strip}
+ if row
+ row = row.map {|s| s && s.strip}
+ end
+
+ if row.count < @expected_field_count
# incomplete row; do not emit anything
row = nil
else
- row = @holding
- @holding = []
+ @holding = ''
end
if @dejunk && row && row.select {|s| s && !s.strip.empty?}.count <= 2
# junky row, suppress output
nil
else
row
end
end
- end
-
- def old_mend(opts)
- @output = opts[:output] ? File.open(opts[:output], "w") : $stdout
- @col_sep = opts[:col_sep] || ','
- @quote_char = opts[:quote_char] || "\0"
-
- expected_field_count = nil
- headers = nil
- @output_count = 0
- fieldcounts = Hash.new(0)
- with_input do |input|
- while (line = get) do
- unless line =~ /^\s*$/
- if opts[:inlined]
- row = explode(line)
- ncells = row.count/2-1
- if !expected_field_count
- headers = row[0..ncells]
- expected_field_count = headers.count
- fieldcounts[headers.count] += 1
- emit(headers.to_csv(:col_sep => @col_sep))
- else
- if row[0..ncells] != headers
- raise "Header mismatch on line #{@input_count}\n Expected: #{headers.join(',')}\n Found: #{row[0..ncells].join(',')}"
- end
- end
- row = row[ncells+1, expected_field_count]
- fieldcounts[row.count] += 1
- emit(row.to_csv(:col_sep => @col_sep))
- elsif !expected_field_count
- # trust the first row
- headers = explode(line).map(&:strip)
- case opts[:snip]
- when Fixnum
- headers.shift(opts[:snip])
- when String
- raise "TODO: snip named header. Multiple?"
- when nil
- # do nothing
- else
- raise "Do not understand snip instruction [#{opts[:snip].inspect}]"
- end
- expected_field_count = headers.count
- fieldcounts[headers.count] += 1
- emit(headers.to_csv(:col_sep => @col_sep))
- else
- running_count = fieldcount(line)
- while !input.eof? && running_count < expected_field_count do
- nextbit = get
- if nextbit
- line = line + ' ' + nextbit
- running_count = fieldcount(line)
- end
- end
-
- unless opts[:dejunk] && junky?(line)
- fieldcounts[fieldcount(line)] += 1
- emit(line)
- end
- end
- end
- end
- end
-
- @output.close if opts[:output]
- {
- :input_count => @input_count,
- :output_count => @output_count,
- :field_counts => fieldcounts,
- :headers => headers
- }
end
def fieldcount(line)
explode(line).count
end