lib/masticate/mender.rb in masticate-0.2.3 vs lib/masticate/mender.rb in masticate-0.3
- old
+ new
@@ -2,16 +2,80 @@
#
# A row that contains fewer delimiters than expected has been split across two lines
# (due to a newline embedded in a field). Glue those two lines into a single line in the output.
class Masticate::Mender < Masticate::Base
- attr_reader :col_sep
+ def configure(opts)
+ standard_options(opts)
- def initialize(filename)
- @filename = filename
+ @inlined = opts[:inlined]
+ @snip = opts[:snip]
+ @dejunk = opts[:dejunk]
+
+ @expected_field_count = nil
+ @holding = []
end
+ # attr_reader :col_sep
+
+ # def initialize(filename)
+ # @filename = filename
+ # end
+
def mend(opts)
+ execute(opts)
+ end
+
+ def crunch(row)
+ if @inlined
+ if row
+ ncells = row.count/2-1
+ if !@headers
+ @headers = row[0..ncells]
+ @expected_field_count = @headers.count
+ emit(@headers)
+ else
+ if row[0..ncells] != @headers
+ raise "Header mismatch on line #{@input_count}\n Expected: #{@headers.join(',')}\n Found: #{row[0..ncells].join(',')}"
+ end
+ end
+ row = row[ncells+1, @expected_field_count]
+ end
+ elsif !@headers
+ # trust the first row
+ @headers = row
+ case @snip
+ when Fixnum
+ @headers.shift(@snip)
+ when String
+ raise "TODO: snip named header. Multiple?"
+ when nil
+ # do nothing
+ else
+ raise "Do not understand snip instruction [#{@snip.inspect}]"
+ end
+ @expected_field_count = @headers.count
+ row = @headers
+ elsif row
+ @holding += row
+ if @holding.count < @expected_field_count
+ # incomplete row; do not emit anything
+ row = nil
+ else
+ row = @holding
+ @holding = []
+ end
+
+ if @dejunk && row && row.select {|s| s && !s.strip.empty?}.count <= 2
+ # junky row, suppress output
+ nil
+ else
+ row
+ end
+ end
+ end
+
+ def old_mend(opts)
@output = opts[:output] ? File.open(opts[:output], "w") : $stdout
@col_sep = opts[:col_sep] || ','
@quote_char = opts[:quote_char] || "\0"
expected_field_count = nil