module Daru class DataFrame attr_reader :vectors attr_reader :fields attr_reader :size attr_reader :name def initialize source, fields=[], name=SecureRandom.uuid if source.empty? @vectors = fields.inject({}){ |a,x| a[x]=Daru::Vector.new; a} else @vectors = source end @fields = fields.empty? ? source.keys.sort : fields @name = name check_length set_fields_order if @vectors.keys.sort != @fields.sort set_vector_names end def self.from_csv file, opts={} opts[:col_sep] ||= ',' opts[:headers] ||= true opts[:converters] ||= :numeric opts[:header_converters] ||= :symbol csv = CSV.open file, 'r', opts yield csv if block_given? first = true df = nil csv.each do |row| if first df = Daru::DataFrame.new({}, csv.headers) first = false end df.insert_row row end df end def column name @vectors[name] end def delete name @vectors.delete name @fields.delete name end # def filter_rows # end # def filter_columns # end def [](*name) unless name[1] return column(name[0]) end h = {} req_fields = @fields & name req_fields.each do |f| h[f] = @vectors[f] end DataFrame.new h, req_fields, @name end def ==(other) @name == other.name and @vectors == other.vectors and @size == other.size and @fields == other.fields end def []=(name, vector) insert_vector name, vector end def row index raise Exception, "Expected index to be within bounds" if index > @size row = [] self.each_column do |column| row << column[index] end row end def has_vector? vector !!@vectors[vector] end def each_row 0.upto(@size-1) do |index| yield row(index) end self end def each_row_with_index 0.upto(@size-1) do |index| yield row(index), index end self end def each_column @fields.each do |field| yield @vectors[field] end self end def each_column_with_name @fields.each do |field| yield @vectors[field], field end self end def insert_vector name, vector raise Exeception, "Expected vector size to be same as DataFrame\ size." if vector.size != self.size @vectors.merge({name => vector}) @fields << name end def insert_row row raise Exception, "Expected new row to same as the number of rows \ in the DataFrame" if row.size != @fields.size @fields.each_with_index do |field, index| @vectors[field] << row[index] end end def to_html(threshold=15) html = '
' + f.to_s + ' | ') } html += '
' + val.to_s + ' | ') } html += '
... | ') } html += '