lib/data_frame.rb in davidrichards-data_frame-0.0.12 vs lib/data_frame.rb in davidrichards-data_frame-0.0.13
- old
+ new
@@ -1,10 +1,11 @@
require 'rubygems'
require 'activesupport'
require 'just_enumerable_stats'
require 'open-uri'
require 'fastercsv'
+require 'ostruct'
# Use a Dictionary if available
begin
require 'facets/dictionary'
rescue LoadError => e
@@ -55,10 +56,13 @@
end
contents ||= obj if obj.is_a?(String)
return nil unless contents
table = FCSV.parse(contents, default_csv_opts.merge(opts))
labels = table.shift
+ while table.last.empty?
+ table.pop
+ end
[labels, table]
end
def default_csv_opts; {:converters => :all}; end
end
@@ -69,10 +73,14 @@
rows.each do |row|
self.add_item(row)
end
end
+ def inspect
+ "DataFrame rows: #{self.rows.size} labels: #{self.labels.inspect}"
+ end
+
# The labels of the data items
attr_reader :labels
alias :variables :labels
# The items stored in the frame
@@ -140,16 +148,110 @@
else
super
end
end
- def drop!(label)
+ def drop!(*labels)
+ labels.each do |label|
+ drop_one!(label)
+ end
+ self
+ end
+
+ def drop_one!(label)
i = self.labels.index(label)
return nil unless i
self.items.each do |item|
item.delete_at(i)
end
self.labels.delete_at(i)
- true
+ self
+ end
+ protected :drop_one!
+
+ def replace!(column, values=nil, &block)
+ column = validate_column(column)
+ if not values
+ values = self.send(column)
+ values.map! {|e| block.call(e)}
+ end
+ replace_column(column, values)
+ self
+ end
+
+ def replace_column(column, values)
+ column = validate_column(column)
+ index = self.labels.index(column)
+ list = []
+ self.items.each_with_index do |item, i|
+ consolidated = item
+ consolidated[index] = values[i]
+ list << consolidated
+ end
+ @items = list.dup
+ end
+ protected :replace_column
+
+ def validate_column(column)
+ column = column.to_sym
+ raise ArgumentError, "Must provide the name of an existing column. Provided #{column.inspect}, needed to provide one of #{self.labels.inspect}" unless self.labels.include?(column)
+ column
+ end
+ protected :validate_column
+
+ # Takes a block to evaluate on each row. The row can be converted into
+ # an OpenStruct or a Hash for easier filter methods. Note, don't try this
+ # with a hash or open struct unless you have facets available.
+ def filter!(as=Array, &block)
+ as = infer_class(as)
+ items = []
+ self.items.each do |row|
+ value = block.call(cast_row(row, as))
+ items << row if value
+ end
+ @items = items.dup
+ self
+ end
+
+ def infer_class(obj)
+ obj = obj.to_s.classify.constantize if obj.is_a?(Symbol)
+ obj = obj.classify.constantize if obj.is_a?(String)
+ obj
+ end
+ protected :infer_class
+
+ def cast_row(row, as)
+ if as == Hash
+ obj = {}
+ self.labels.each_with_index do |label, i|
+ obj[label] = row[i]
+ end
+ obj
+ elsif as == OpenStruct
+ obj = OpenStruct.new
+ self.labels.each_with_index do |label, i|
+ obj.table[label] = row[i]
+ end
+ obj
+ elsif as == Array
+ row
+ else
+ as.new(row)
+ end
+ end
+ protected :cast_row
+
+ # Creates a new data frame, only with the specified columns.
+ def subset_from_columns(*cols)
+ new_labels = self.labels.inject([]) do |list, label|
+ list << label if cols.include?(label)
+ list
+ end
+ new_data_frame = DataFrame.new(*self.labels)
+ new_data_frame.import(self.items)
+ self.labels.each do |label|
+ new_data_frame.drop!(label) unless new_labels.include?(label)
+ end
+ new_data_frame
end
end
\ No newline at end of file