lib/picky/sources/csv.rb in picky-0.0.5 vs lib/picky/sources/csv.rb in picky-0.0.6
- old
+ new
@@ -1,29 +1,42 @@
+require 'CSV'
+
module Sources
+ class NoCSVFileGiven < StandardError; end
+
class CSV < Base
- attr_reader :file_name
+ attr_reader :file_name, :field_names
- def initialize file_name, *field_names
- @file_name = file_name
- @field_names
+ def initialize *field_names, options
+ @field_names = field_names
+ @file_name = Hash === options && options[:file] || raise_no_file_given(field_names)
end
- # Counts all the entries that are used for the index.
#
- def count type
- `wc -l #{file_name}`
+ #
+ def raise_no_file_given field_names
+ raise NoCSVFileGiven.new field_names.join(', ')
end
- # Harvests the data to index, chunked.
+ # Harvests the data to index.
#
- # Subclasses should override harvest_statement to define how their data is found.
- # Example:
- # "SELECT indexed_id, value FROM bla_table st WHERE kind = 'bla'"
+ def harvest _, field
+ index = field_names.index field.name
+ get_data do |ary|
+ indexed_id = ary.shift.to_i
+ text = ary[index]
+ next unless text
+ text.force_encoding 'utf-8' # TODO Still needed?
+ yield indexed_id, text
+ end
+ end
+
#
- def harvest offset
- File.open file_name, 'r'
+ #
+ def get_data &block
+ ::CSV.foreach file_name, &block
end
end
end
\ No newline at end of file