lib/daru/dataframe.rb in daru-0.1.2 vs lib/daru/dataframe.rb in daru-0.1.3
- old
+ new
@@ -1,16 +1,13 @@
-$:.unshift File.dirname(__FILE__)
+require 'daru/accessors/dataframe_by_row.rb'
+require 'daru/maths/arithmetic/dataframe.rb'
+require 'daru/maths/statistics/dataframe.rb'
+require 'daru/plotting/dataframe.rb'
+require 'daru/io/io.rb'
-require 'accessors/dataframe_by_row.rb'
-require 'maths/arithmetic/dataframe.rb'
-require 'maths/statistics/dataframe.rb'
-require 'plotting/dataframe.rb'
-require 'io/io.rb'
-
module Daru
class DataFrame
-
include Daru::Maths::Arithmetic::DataFrame
include Daru::Maths::Statistics::DataFrame
include Daru::Plotting::DataFrame if Daru.has_nyaplot?
class << self
@@ -113,35 +110,34 @@
end
# Create DataFrame by specifying rows as an Array of Arrays or Array of
# Daru::Vector objects.
def rows source, opts={}
- df = nil
- if source.all? { |v| v.size == source[0].size }
- first = source[0]
- index = []
- opts[:order] ||=
- if first.is_a?(Daru::Vector) # assume that all are Vectors
- source.each { |vec| index << vec.name }
+ first = source.first
+
+ raise SizeError, 'All vectors must have same length' \
+ unless source.all? { |v| v.size == first.size }
+
+ index = []
+ opts[:order] ||=
+ case first
+ when Daru::Vector # assume that all are Vectors
+ index = source.map(&:name)
first.index.to_a
- elsif first.is_a?(Array)
- Array.new(first.size) { |i| i.to_s }
+ when Array
+ Array.new(first.size, &:to_s)
end
- if source.all? { |s| s.is_a?(Array) }
- df = Daru::DataFrame.new(source.transpose, opts)
- else # array of Daru::Vectors
- df = Daru::DataFrame.new({}, opts)
+ if source.all? { |s| s.is_a?(Array) }
+ Daru::DataFrame.new(source.transpose, opts)
+ else # array of Daru::Vectors
+ Daru::DataFrame.new({}, opts).tap do |df|
source.each_with_index do |row, idx|
- df[(index[idx] || idx), :row] = row
+ df[index[idx] || idx, :row] = row
end
end
- else
- raise SizeError, "All vectors must have same length"
end
-
- df
end
# Generates a new dataset, using three vectors
# - Rows
# - Columns
@@ -160,22 +156,20 @@
# a 0 1
# b 1 0
#
# Useful to process outputs from databases
def crosstab_by_assignation rows, columns, values
- raise "Three vectors should be equal size" if
- rows.size != columns.size or rows.size!=values.size
+ raise 'Three vectors should be equal size' if
+ rows.size != columns.size || rows.size!=values.size
cols_values = columns.factors
cols_n = cols_values.size
- h_rows = rows.factors.inject({}) do |a,v|
- a[v] = cols_values.inject({}) do |a1,v1|
+ h_rows = rows.factors.each_with_object({}) do |v, a|
+ a[v] = cols_values.each_with_object({}) do |v1, a1|
a1[v1]=nil
- a1
end
- a
end
values.each_index do |i|
h_rows[rows[i]][columns[i]] = values[i]
end
@@ -248,11 +242,11 @@
index = opts[:index]
clone = opts[:clone] == false ? false : true
@data = []
temp_name = opts[:name]
- @name = temp_name || SecureRandom.uuid
+ @name = temp_name || SecureRandom.uuid
if source.empty?
@vectors = try_create_index vectors
@index = try_create_index index
create_empty_vectors
@@ -264,44 +258,44 @@
equal order size (#{source.size})" if source.size != vectors.size
@index = try_create_index(index || source[0].size)
@vectors = try_create_index(vectors)
- @vectors.each_with_index do |vec,idx|
+ @vectors.each_with_index do |_vec,idx|
@data << Daru::Vector.new(source[idx], index: @index)
end
elsif source.all? { |s| s.is_a?(Daru::Vector) }
hsh = {}
vectors.each_with_index do |name, idx|
hsh[name] = source[idx]
end
initialize(hsh, index: index, order: vectors, name: @name, clone: clone)
else # array of hashes
- if vectors.nil?
- @vectors = Daru::Index.new source[0].keys
- else
- @vectors = Daru::Index.new(
- (vectors + (source[0].keys - vectors)).uniq)
- end
+ @vectors =
+ if vectors.nil?
+ Daru::Index.new source[0].keys
+ else
+ Daru::Index.new((vectors + (source[0].keys - vectors)).uniq)
+ end
@index = Daru::Index.new(index || source.size)
@vectors.each do |name|
v = []
- source.each do |hsh|
- v << (hsh[name] || hsh[name.to_s])
+ source.each do |h|
+ v << (h[name] || h[name.to_s])
end
@data << Daru::Vector.new(v, name: set_name(name), index: @index)
end
end
when Hash
create_vectors_index_with vectors, source
if all_daru_vectors_in_source? source
+ vectors_have_same_index = all_vectors_have_equal_indexes?(source)
if !index.nil?
@index = try_create_index index
- elsif all_vectors_have_equal_indexes?(source)
- vectors_have_same_index = true
+ elsif vectors_have_same_index
@index = source.values[0].index.dup
else
all_indexes = []
source.each_value do |vector|
all_indexes << vector.index.to_a
@@ -318,18 +312,14 @@
# avoids matching indexes of vectors if all the supplied vectors
# have the same index.
if vectors_have_same_index
v = source[vector].dup
else
- v = Daru::Vector.new([], name: vector, index: @index)
+ v = Daru::Vector.new([], name: vector, metadata: source[vector].metadata.dup, index: @index)
@index.each do |idx|
- if source[vector].index.include? idx
- v[idx] = source[vector][idx]
- else
- v[idx] = nil
- end
+ v[idx] = source[vector].index.include?(idx) ? source[vector][idx] : nil
end
end
@data << v
end
else
@@ -337,42 +327,42 @@
end
else
@index = try_create_index(index || source.values[0].size)
@vectors.each do |name|
- @data << Daru::Vector.new(source[name].dup, name: set_name(name), index: @index)
+ meta_opt = source[name].respond_to?(:metadata) ? {metadata: source[name].metadata.dup} : {}
+ @data << Daru::Vector.new(source[name].dup, name: set_name(name), **meta_opt, index: @index)
end
end
end
end
set_size
validate
update
end
- def vector *args
- $stderr.puts "#vector has been deprecated in favour of #[]. Please use that."
+ def vector(*)
+ $stderr.puts '#vector has been deprecated in favour of #[]. Please use that.'
self[*names]
end
# Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
# Defaults to *:vector*. Use of this method is not recommended for accessing
- # rows or vectors. Use df.row[:a] for accessing row with index ':a' or
- # df.vector[:vec] for accessing vector with index *:vec*.
+ # rows. Use df.row[:a] for accessing row with index ':a'.
def [](*names)
- if names[-1] == :vector or names[-1] == :row
+ if names[-1] == :vector || names[-1] == :row
axis = names[-1]
names = names[0..-2]
else
axis = :vector
end
if axis == :vector
- access_vector *names
+ access_vector(*names)
elsif axis == :row
- access_row *names
+ access_row(*names)
else
raise IndexError, "Expected axis to be row or vector not #{axis}"
end
end
@@ -431,11 +421,11 @@
def dup vectors_to_dup=nil
vectors_to_dup = @vectors.to_a unless vectors_to_dup
src = []
vectors_to_dup.each do |vec|
- src << @data[@vectors[vec]].to_a.dup
+ src << @data[@vectors[vec]].dup
end
new_order = Daru::Index.new(vectors_to_dup)
Daru::DataFrame.new src, order: new_order, index: @index.dup, name: @name, clone: true
end
@@ -452,15 +442,14 @@
#
# +vectors_to_clone+ - Names of vectors to clone. Optional. Will return
# a view of the whole data frame otherwise.
def clone *vectors_to_clone
vectors_to_clone.flatten! unless vectors_to_clone.all? { |a| !a.is_a?(Array) }
- return super if vectors_to_clone.empty?
+ vectors_to_clone = @vectors.to_a if vectors_to_clone.empty?
- h = vectors_to_clone.inject({}) do |hsh, vec|
+ h = vectors_to_clone.each_with_object({}) do |vec, hsh|
hsh[vec] = self[vec]
- hsh
end
Daru::DataFrame.new(h, clone: false)
end
# Returns a 'shallow' copy of DataFrame if missing data is not present,
@@ -474,13 +463,12 @@
end
# Creates a new duplicate dataframe containing only rows
# without a single missing value.
def dup_only_valid vecs=nil
- rows_with_nil = @data.inject([]) do |memo, vector|
+ rows_with_nil = @data.each_with_object([]) do |vector, memo|
memo.concat vector.missing_positions
- memo
end.uniq
row_indexes = @index.to_a
(vecs.nil? ? self : dup(vecs)).row[*(row_indexes - rows_with_nil)]
end
@@ -503,11 +491,11 @@
end
alias_method :each_column, :each_vector
# Iterate over each vector alongwith the name of the vector
- def each_vector_with_index(&block)
+ def each_vector_with_index
return to_enum(:each_vector_with_index) unless block_given?
@vectors.each do |vector|
yield @data[@vectors[vector]], vector
end
@@ -516,21 +504,21 @@
end
alias_method :each_column_with_index, :each_vector_with_index
# Iterate over each row
- def each_row(&block)
+ def each_row
return to_enum(:each_row) unless block_given?
@index.each do |index|
yield access_row(index)
end
self
end
- def each_row_with_index(&block)
+ def each_row_with_index
return to_enum(:each_row_with_index) unless block_given?
@index.each do |index|
yield access_row(index), index
end
@@ -550,11 +538,11 @@
# == Arguments
#
# * +axis+ - The axis to iterate over. Can be :vector (or :column)
# or :row. Default to :vector.
def each axis=:vector, &block
- if axis == :vector or axis == :column
+ if axis == :vector || axis == :column
each_vector(&block)
elsif axis == :row
each_row(&block)
else
raise ArgumentError, "Unknown axis #{axis}"
@@ -575,11 +563,11 @@
# == Arguments
#
# * +axis+ - The axis to iterate over. Can be :vector (or :column)
# or :row. Default to :vector.
def collect axis=:vector, &block
- if axis == :vector or axis == :column
+ if axis == :vector || axis == :column
collect_vectors(&block)
elsif axis == :row
collect_rows(&block)
else
raise ArgumentError, "Unknown axis #{axis}"
@@ -601,11 +589,11 @@
# == Arguments
#
# * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
# Default to :vector.
def map axis=:vector, &block
- if axis == :vector or axis == :column
+ if axis == :vector || axis == :column
map_vectors(&block)
elsif axis == :row
map_rows(&block)
else
raise ArgumentError, "Unknown axis #{axis}"
@@ -619,11 +607,11 @@
# == Arguments
#
# * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
# Default to :vector.
def map! axis=:vector, &block
- if axis == :vector or axis == :column
+ if axis == :vector || axis == :column
map_vectors!(&block)
elsif axis == :row
map_rows!(&block)
end
end
@@ -644,11 +632,11 @@
# == Arguments
#
# * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
# Default to :vector.
def recode axis=:vector, &block
- if axis == :vector or axis == :column
+ if axis == :vector || axis == :column
recode_vectors(&block)
elsif axis == :row
recode_rows(&block)
end
end
@@ -680,46 +668,46 @@
#
# df.filter(:row) do |row|
# row[:a] + row[:d] < 100
# end
def filter axis=:vector, &block
- if axis == :vector or axis == :column
+ if axis == :vector || axis == :column
filter_vectors(&block)
elsif axis == :row
filter_rows(&block)
end
end
- def recode_vectors &block
+ def recode_vectors
block_given? or return to_enum(:recode_vectors)
- df = self.dup
+ df = dup
df.each_vector_with_index do |v, i|
ret = yield v
ret.is_a?(Daru::Vector) or
raise TypeError, "Every iteration must return Daru::Vector not #{ret.class}"
df[*i] = ret
end
df
end
- def recode_rows &block
+ def recode_rows
block_given? or return to_enum(:recode_rows)
- df = self.dup
+ df = dup
df.each_row_with_index do |r, i|
ret = yield r
ret.is_a?(Daru::Vector) or raise TypeError, "Every iteration must return Daru::Vector not #{ret.class}"
df.row[i] = ret
end
df
end
# Map each vector and return an Array.
- def map_vectors(&block)
+ def map_vectors
return to_enum(:map_vectors) unless block_given?
arry = []
@data.each do |vec|
arry << yield(vec)
@@ -727,11 +715,11 @@
arry
end
# Destructive form of #map_vectors
- def map_vectors!(&block)
+ def map_vectors!
return to_enum(:map_vectors!) unless block_given?
vectors.dup.each do |n|
v = yield self[n]
v.is_a?(Daru::Vector) or raise TypeError, "Must return a Daru::Vector not #{v.class}"
@@ -740,11 +728,11 @@
self
end
# Map vectors alongwith the index.
- def map_vectors_with_index(&block)
+ def map_vectors_with_index
return to_enum(:map_vectors_with_index) unless block_given?
dt = []
each_vector_with_index do |vector, name|
dt << yield(vector, name)
@@ -752,58 +740,58 @@
dt
end
# Map each row
- def map_rows(&block)
+ def map_rows
return to_enum(:map_rows) unless block_given?
dt = []
each_row do |row|
dt << yield(row)
end
dt
end
- def map_rows_with_index(&block)
+ def map_rows_with_index
return to_enum(:map_rows_with_index) unless block_given?
dt = []
each_row_with_index do |row, index|
dt << yield(row, index)
end
dt
end
- def map_rows!(&block)
+ def map_rows!
return to_enum(:map_rows!) unless block_given?
index.dup.each do |i|
- r = yield self.row[i]
+ r = yield row[i]
r.is_a?(Daru::Vector) or raise TypeError, "Returned object must be Daru::Vector not #{r.class}"
- self.row[i] = r
+ row[i] = r
end
self
end
# Retrieves a Daru::Vector, based on the result of calculation
# performed on each row.
- def collect_rows &block
+ def collect_rows
return to_enum(:collect_rows) unless block_given?
data = []
each_row do |row|
data.push yield(row)
end
Daru::Vector.new(data, index: @index)
end
- def collect_row_with_index &block
+ def collect_row_with_index
return to_enum(:collect_row_with_index) unless block_given?
data = []
each_row_with_index do |row, i|
data.push yield(row, i)
@@ -812,22 +800,22 @@
Daru::Vector.new(data, index: @index)
end
# Retrives a Daru::Vector, based on the result of calculation
# performed on each vector.
- def collect_vectors &block
+ def collect_vectors
return to_enum(:collect_vectors) unless block_given?
data = []
each_vector do |vec|
data.push yield(vec)
end
Daru::Vector.new(data, index: @vectors)
end
- def collect_vector_with_index &block
+ def collect_vector_with_index
return to_enum(:collect_vector_with_index) unless block_given?
data = []
each_vector_with_index do |vec, i|
data.push yield(vec, i)
@@ -850,34 +838,35 @@
}
Matrix.rows(rows)
end
-
# Delete a vector
def delete_vector vector
- if @vectors.include? vector
- @data.delete_at @vectors[vector]
- @vectors = Daru::Index.new @vectors.to_a - [vector]
- else
- raise IndexError, "Vector #{vector} does not exist."
- end
+ raise IndexError, "Vector #{vector} does not exist." unless @vectors.include?(vector)
+ @data.delete_at @vectors[vector]
+ @vectors = Daru::Index.new @vectors.to_a - [vector]
+
self
end
+ # Deletes a list of vectors
+ def delete_vectors *vectors
+ Array(vectors).each { |vec| delete_vector vec }
+
+ self
+ end
+
# Delete a row
def delete_row index
idx = named_index_for index
- if @index.include? idx
- @index = Daru::Index.new(@index.to_a - [idx])
- self.each_vector do |vector|
- vector.delete_at idx
- end
- else
- raise IndexError, "Index #{index} does not exist."
+ raise IndexError, "Index #{index} does not exist." unless @index.include? idx
+ @index = Daru::Index.new(@index.to_a - [idx])
+ each_vector do |vector|
+ vector.delete_at idx
end
set_size
end
@@ -893,11 +882,11 @@
end
ds_boot.update
ds_boot
end
- def keep_row_if &block
+ def keep_row_if
deletion = []
@index.each do |index|
keep_row = yield access_row(index)
@@ -906,11 +895,11 @@
deletion.each { |idx|
delete_row idx
}
end
- def keep_vector_if &block
+ def keep_vector_if
@vectors.each do |vector|
keep_vector = yield @data[@vectors[vector]], vector
delete_vector vector unless keep_vector
end
@@ -921,50 +910,40 @@
d = []
each_row do |row|
d.push(row[vec]) if yield row
end
- Daru::Vector.new(d)
+ Daru::Vector.new(d, metadata: self[vec].metadata.dup)
end
# Iterates over each row and retains it in a new DataFrame if the block returns
# true for that row.
- def filter_rows &block
+ def filter_rows
return to_enum(:filter_rows) unless block_given?
- df = Daru::DataFrame.new({}, order: @vectors.to_a)
- marked = []
+ keep_rows = @index.map { |index| yield access_row(index) }
- @index.each do |index|
- keep_row = yield access_row(index)
- marked << index if keep_row
- end
-
- marked.each do |idx|
- df.row[idx] = self[idx, :row]
- end
-
- df
+ where keep_rows
end
# Iterates over each vector and retains it in a new DataFrame if the block returns
# true for that vector.
def filter_vectors &block
return to_enum(:filter_vectors) unless block_given?
- df = self.dup
- df.keep_vector_if &block
+ df = dup
+ df.keep_vector_if(&block)
df
end
# Test each row with one or more tests. Each test is a Proc with the form
# *Proc.new {|row| row[:age] > 0}*
#
# The function returns an array with all errors.
def verify(*tests)
- if(tests[0].is_a? Symbol)
+ if tests[0].is_a? Symbol
id = tests[0]
tests.shift
else
id = @vectors.first
end
@@ -972,17 +951,16 @@
vr = []
i = 0
each(:row) do |row|
i += 1
tests.each do |test|
- if !test[2].call(row)
- values = ""
- if test[1].size>0
- values = " (" + test[1].collect{ |k| "#{k}=#{row[k]}" }.join(", ") + ")"
- end
- vr.push("#{i} [#{row[id]}]: #{test[0]}#{values}")
+ next if test[2].call(row)
+ values = ''
+ unless test[1].empty?
+ values = ' (' + test[1].collect { |k| "#{k}=#{row[k]}" }.join(', ') + ')'
end
+ vr.push("#{i} [#{row[id]}]: #{test[0]}#{values}")
end
end
vr
end
@@ -1049,11 +1027,11 @@
# TODO: remove next version
alias :vector_missing_values :missing_values_rows
def has_missing_data?
- !!@data.any? { |v| v.has_missing_data? }
+ !!@data.any?(&:has_missing_data?)
end
alias :flawed? :has_missing_data?
# Return a nested hash using vector names as keys and an array constructed of
@@ -1073,23 +1051,23 @@
current = current[root]
end
name = row[tree_keys.last]
if !block
current[name] ||= []
- current[name].push(row.to_hash.delete_if { |key,value| tree_keys.include? key})
+ current[name].push(row.to_h.delete_if { |key,_value| tree_keys.include? key })
else
- current[name] = block.call(row, current,name)
+ current[name] = yield(row, current, name)
end
end
out
end
def vector_count_characters vecs=nil
vecs ||= @vectors.to_a
- collect_row_with_index do |row, i|
+ collect_rows do |row|
vecs.inject(0) do |memo, vec|
memo + (row[vec].nil? ? 0 : row[vec].to_s.size)
end
end
end
@@ -1127,11 +1105,11 @@
# df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
# df.any?(:row) do |row|
# row[:a] < 3 and row[:b] == 'b'
# end #=> true
def any? axis=:vector, &block
- if axis == :vector or axis == :column
+ if axis == :vector || axis == :column
@data.any?(&block)
elsif axis == :row
each_row do |row|
return true if yield(row)
end
@@ -1149,11 +1127,11 @@
# df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
# df.all?(:row) do |row|
# row[:a] < 10
# end #=> true
def all? axis=:vector, &block
- if axis == :vector or axis == :column
+ if axis == :vector || axis == :column
@data.all?(&block)
elsif axis == :row
each_row do |row|
return false unless yield(row)
end
@@ -1234,50 +1212,56 @@
# # ["foo", "one", 3]=>[6],
# # ["foo", "three", 8]=>[7],
# # ["foo", "two", 3]=>[2, 4]}
def group_by *vectors
vectors.flatten!
- vectors.each { |v| raise(ArgumentError, "Vector #{v} does not exist") unless
- has_vector?(v) }
+ vectors.each { |v|
+ raise(ArgumentError, "Vector #{v} does not exist") unless has_vector?(v)
+ }
Daru::Core::GroupBy.new(self, vectors)
end
def reindex_vectors new_vectors
- raise ArgumentError, "Must pass the new index of type Index or its "\
- "subclasses, not #{new_index.class}" unless new_vectors.kind_of?(Daru::Index)
+ raise ArgumentError, 'Must pass the new index of type Index or its '\
+ "subclasses, not #{new_index.class}" unless new_vectors.is_a?(Daru::Index)
cl = Daru::DataFrame.new({}, order: new_vectors, index: @index, name: @name)
new_vectors.each do |vec|
- if @vectors.include?(vec)
- cl[vec] = self[vec]
- else
- cl[vec] = [nil]*nrows
- end
+ cl[vec] = @vectors.include?(vec) ? self[vec] : cl[vec] = [nil]*nrows
end
cl
end
# Concatenate another DataFrame along corresponding columns.
- # Very premature implementation. Use with caution.
+ # If columns do not exist in both dataframes, they are filled with nils
def concat other_df
- vectors = []
- @vectors.each do |v|
- vectors << self[v].to_a.dup.concat(other_df[v].to_a)
+ vectors = @vectors.to_a
+ data = []
+
+ vectors.each do |v|
+ other_vec = other_df.vectors.include?(v) ? other_df[v].to_a : [nil] * other_df.size
+ data << self[v].dup.to_a.concat(other_vec)
end
- Daru::DataFrame.new(vectors, order: @vectors)
+ other_df.vectors.each do |v|
+ next if vectors.include?(v)
+ vectors << v
+ data << ([nil] * size).concat(other_df[v].to_a)
+ end
+
+ Daru::DataFrame.new(data, order: vectors)
end
# Set a particular column as the new DF
def set_index new_index, opts={}
- raise ArgumentError, "All elements in new index must be unique." if
+ raise ArgumentError, 'All elements in new index must be unique.' if
@size != self[new_index].uniq.size
self.index = Daru::Index.new(self[new_index].to_a)
- self.delete_vector(new_index) unless opts[:keep]
+ delete_vector(new_index) unless opts[:keep]
self
end
# Change the index of the DataFrame and preserve the labels of the previous
@@ -1301,20 +1285,16 @@
# # b 2 22
# # 0 nil nil
# # a 1 11
# # g nil nil
def reindex new_index
- raise ArgumentError, "Must pass the new index of type Index or its "\
- "subclasses, not #{new_index.class}" unless new_index.kind_of?(Daru::Index)
+ raise ArgumentError, 'Must pass the new index of type Index or its '\
+ "subclasses, not #{new_index.class}" unless new_index.is_a?(Daru::Index)
cl = Daru::DataFrame.new({}, order: @vectors, index: new_index, name: @name)
new_index.each do |idx|
- if @index.include?(idx)
- cl.row[idx] = self.row[idx]
- else
- cl.row[idx] = [nil]*ncols
- end
+ cl.row[idx] = @index.include?(idx) ? row[idx] : [nil]*ncols
end
cl
end
@@ -1328,11 +1308,11 @@
#
# df.index = Daru::Index.new(['a','b','c','d'])
# df.index.to_a #=> ['a','b','c','d']
# df.row['a'].to_a #=> [1,11]
def index= idx
- @data.each { |vec| vec.index = idx}
+ @data.each { |vec| vec.index = idx }
@index = idx
self
end
@@ -1345,114 +1325,214 @@
# df.vectors.to_a #=> [:a, :b, :c]
#
# df.vectors = Daru::Index.new([:foo, :bar, :baz])
# df.vectors.to_a #=> [:foo, :bar, :baz]
def vectors= idx
- raise ArgumentError, "Can only reindex with Index and its subclasses" unless
- index.kind_of?(Daru::Index)
+ raise ArgumentError, 'Can only reindex with Index and its subclasses' unless
+ index.is_a?(Daru::Index)
raise ArgumentError, "Specified index length #{idx.size} not equal to"\
"dataframe size #{ncols}" if idx.size != ncols
@vectors = idx
self
end
+ # Renames the vectors
+ #
+ # == Arguments
+ #
+ # * name_map - A hash where the keys are the exising vector names and
+ # the values are the new names. If a vector is renamed
+ # to a vector name that is already in use, the existing
+ # one is overwritten.
+ #
+ # == Usage
+ #
+ # df = Daru::DataFrame.new({ a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44] })
+ # df.rename_vectors :a => :alpha, :c => :gamma
+ # df.vectors.to_a #=> [:alpha, :b, :gamma]
+ def rename_vectors name_map
+ existing_targets = name_map.select { |k,v| k != v }.values & vectors.to_a
+ delete_vectors(*existing_targets)
+
+ new_names = vectors.to_a.map { |v| name_map[v] ? name_map[v] : v }
+ self.vectors = Daru::Index.new new_names
+ end
+
# Return the indexes of all the numeric vectors. Will include vectors with nils
# alongwith numbers.
def numeric_vectors
numerics = []
each_vector_with_index do |vec, i|
- numerics << i if(vec.type == :numeric)
+ numerics << i if vec.type == :numeric
end
numerics
end
def numeric_vector_names
numerics = []
@vectors.each do |v|
- numerics << v if (self[v].type == :numeric)
+ numerics << v if self[v].type == :numeric
end
numerics
end
# Return a DataFrame of only the numerical Vectors. If clone: false
# is specified as option, only a *view* of the Vectors will be
# returned. Defaults to clone: true.
def only_numerics opts={}
cln = opts[:clone] == false ? false : true
nv = numeric_vectors
- arry = nv.inject([]) do |arr, v|
+ arry = nv.each_with_object([]) do |v, arr|
arr << self[v]
- arr
end
order = Index.new(nv)
Daru::DataFrame.new(arry, clone: cln, order: order, index: @index)
end
# Generate a summary of this DataFrame with ReportBuilder.
- def summary(method = :to_text)
+ def summary(method=:to_text)
ReportBuilder.new(no_title: true).add(self).send(method)
end
def report_building(b) # :nodoc: #
- b.section(:name=>@name) do |g|
+ b.section(name: @name) do |g|
g.text "Number of rows: #{nrows}"
@vectors.each do |v|
g.text "Element:[#{v}]"
g.parse_element(self[v])
end
end
end
- # Sorts a dataframe (ascending/descending)according to the given sequence of
- # vectors, using the attributes provided in the blocks.
+ # Sorts a dataframe (ascending/descending) in the given pripority sequence of
+ # vectors, with or without a block.
#
# @param order [Array] The order of vector names in which the DataFrame
# should be sorted.
# @param [Hash] opts The options to sort with.
# @option opts [TrueClass,FalseClass,Array] :ascending (true) Sort in ascending
# or descending order. Specify Array corresponding to *order* for multiple
# sort orders.
- # @option opts [Hash] :by ({|a,b| a <=> b}) Specify attributes of objects to
+ # @option opts [Hash] :by (lambda{|a| a }) Specify attributes of objects to
# to be used for sorting, for each vector name in *order* as a hash of
- # vector name and lambda pairs. In case a lambda for a vector is not
+ # vector name and lambda expressions. In case a lambda for a vector is not
# specified, the default will be used.
+ # @option opts [TrueClass,FalseClass,Array] :handle_nils (false) Handle nils
+ # automatically or not when a block is provided.
+ # If set to True, nils will appear at top after sorting.
#
- # == Usage
+ # @example Sort a dataframe with a vector sequence.
#
- # df = Daru::DataFrame.new({a: [-3,2,-1,4], b: [4,3,2,1]})
#
- # #<Daru::DataFrame:140630680 @name = 04e00197-f8d5-4161-bca2-93266bfabc6f @size = 4>
- # # a b
- # # 0 -3 4
- # # 1 2 3
- # # 2 -1 2
- # # 3 4 1
- # df.sort([:a], by: { a: lambda { |a,b| a.abs <=> b.abs } })
+ # df = Daru::DataFrame.new({a: [1,2,1,2,3], b: [5,4,3,2,1]})
+ #
+ # df.sort [:a, :b]
+ # # =>
+ # # <Daru::DataFrame:30604000 @name = d6a9294e-2c09-418f-b646-aa9244653444 @size = 5>
+ # # a b
+ # # 2 1 3
+ # # 0 1 5
+ # # 3 2 2
+ # # 1 2 4
+ # # 4 3 1
+ #
+ # @example Sort a dataframe without a block. Here nils will be handled automatically.
+ #
+ # df = Daru::DataFrame.new({a: [-3,nil,-1,nil,5], b: [4,3,2,1,4]})
+ #
+ # df.sort([:a])
+ # # =>
+ # # <Daru::DataFrame:14810920 @name = c07fb5c7-2201-458d-b679-6a1f7ebfe49f @size = 5>
+ # # a b
+ # # 1 nil 3
+ # # 3 nil 1
+ # # 0 -3 4
+ # # 2 -1 2
+ # # 4 5 4
+ #
+ # @example Sort a dataframe with a block with nils handled automatically.
+ #
+ # df = Daru::DataFrame.new({a: [nil,-1,1,nil,-1,1], b: ['aaa','aa',nil,'baaa','x',nil] })
+ #
+ # df.sort [:b], by: {b: lambda { |a| a.length } }
+ # # NoMethodError: undefined method `length' for nil:NilClass
+ # # from (pry):8:in `block in __pry__'
+ #
+ # df.sort [:b], by: {b: lambda { |a| a.length } }, handle_nils: true
+ #
+ # # =>
+ # # <Daru::DataFrame:28469540 @name = 5f986508-556f-468b-be0c-88cc3534445c @size = 6>
+ # # a b
+ # # 2 1 nil
+ # # 5 1 nil
+ # # 4 -1 x
+ # # 1 -1 aa
+ # # 0 nil aaa
+ # # 3 nil baaa
+ #
+ # @example Sort a dataframe with a block with nils handled manually.
+ #
+ # df = Daru::DataFrame.new({a: [nil,-1,1,nil,-1,1], b: ['aaa','aa',nil,'baaa','x',nil] })
+ #
+ # # To print nils at the bottom one can use lambda { |a| (a.nil?)[1]:[0,a.length] }
+ # df.sort [:b], by: {b: lambda { |a| (a.nil?)?[1]:[0,a.length] } }, handle_nils: true
+ #
+ # # =>
+ # #<Daru::DataFrame:22214180 @name = cd7703c7-1dca-4560-840b-5ea51a852ef9 @size = 6>
+ # # a b
+ # # 4 -1 x
+ # # 1 -1 aa
+ # # 0 nil aaa
+ # # 3 nil baaa
+ # # 2 1 nil
+ # # 5 1 nil
+
def sort! vector_order, opts={}
- raise ArgumentError, "Required atleast one vector name" if vector_order.size < 1
+ raise ArgumentError, 'Required atleast one vector name' if vector_order.empty?
opts = {
ascending: true,
- type: :quick_sort,
+ handle_nils: false,
by: {}
}.merge(opts)
- opts[:by] = create_logic_blocks vector_order, opts[:by]
opts[:ascending] = sort_order_array vector_order, opts[:ascending]
- idx = @index.to_a
- send(opts[:type], vector_order, idx, opts[:by], opts[:ascending])
- self.index = Daru::Index.new(idx)
+ opts[:handle_nils] = handle_nils_array vector_order, opts[:handle_nils]
+ blocks = create_logic_blocks vector_order, opts[:by], opts[:ascending]
+ block = lambda do |r1, r2|
+ # Build left and right array to compare two rows
+ left = build_array_from_blocks vector_order, opts, blocks, r1, r2
+ right = build_array_from_blocks vector_order, opts, blocks, r2, r1
+
+ # Resolve conflict by Index if all attributes are same
+ left << r1
+ right << r2
+ left <=> right
+ end
+
+ idx = (0..@index.size-1).sort(&block)
+
+ old_index = @index.to_a
+ self.index = Daru::Index.new(idx.map { |i| old_index[i] })
+
+ vectors.each do |v|
+ @data[@vectors[v]] = Daru::Vector.new(
+ idx.map { |i| @data[@vectors[v]].data[i] },
+ name: self[v].name, metadata: self[v].metadata.dup, index: index
+ )
+ end
+
self
end
# Non-destructive version of #sort!
def sort vector_order, opts={}
- self.dup.sort! vector_order, opts
+ dup.sort! vector_order, opts
end
# Pivots a data frame on specified vectors and applies an aggregate function
# to quickly generate a summary.
#
@@ -1487,29 +1567,31 @@
# # [:e, :one] [:e, :two]
# # [:bar] 18 26
# # [:foo] 10 12
def pivot_table opts={}
raise ArgumentError,
- "Specify grouping index" if !opts[:index] or opts[:index].empty?
+ 'Specify grouping index' if !opts[:index] || opts[:index].empty?
index = opts[:index]
vectors = opts[:vectors] || []
aggregate_function = opts[:agg] || :mean
values =
- if opts[:values].is_a?(Symbol)
- [opts[:values]]
- elsif opts[:values].is_a?(Array)
- opts[:values]
- else # nil
- (@vectors.to_a - (index | vectors)) & numeric_vector_names
- end
+ if opts[:values].is_a?(Symbol)
+ [opts[:values]]
+ elsif opts[:values].is_a?(Array)
+ opts[:values]
+ else # nil
+ (@vectors.to_a - (index | vectors)) & numeric_vector_names
+ end
- raise IndexError, "No numeric vectors to aggregate" if values.empty?
+ raise IndexError, 'No numeric vectors to aggregate' if values.empty?
- grouped = group_by(index)
+ grouped = group_by(index)
- unless vectors.empty?
+ if vectors.empty?
+ grouped.send(aggregate_function)
+ else
super_hash = {}
values.each do |value|
grouped.groups.each do |group_name, row_numbers|
super_hash[group_name] ||= {}
@@ -1546,12 +1628,10 @@
# pivoted_dataframe[symbolize(vector_index)][symbolize(row_index)] = val
pivoted_dataframe[vector_index][row_index] = val
end
end
return pivoted_dataframe
- else
- grouped.send(aggregate_function)
end
end
# Merge vectors from two DataFrames. In case of name collision,
# the vectors names are changed to x_1, x_2 ....
@@ -1559,12 +1639,12 @@
# @return {Daru::DataFrame}
def merge other_df
raise "Number of rows must be equal in this: #{nrows} and other: #{other_df.nrows}" unless nrows == other_df.nrows
new_fields = (@vectors.to_a + other_df.vectors.to_a)
- .recode_repeated
- .map(&:to_sym)
+ .recode_repeated
+ .map(&:to_sym)
df_new = DataFrame.new({}, order: new_fields)
(0...nrows).to_a.each do |i|
row = self.row[i].to_a + other_df.row[i].to_a
df_new.add_row(row)
@@ -1601,11 +1681,10 @@
# # 1 3 Ninja 4
def join(other_df,opts={})
Daru::Core::Merge.join(self, other_df, opts)
end
-
# Creates a new dataset for one to many relations
# on a dataset, based on pattern of field names.
#
# for example, you have a survey for number of children
# with this structure:
@@ -1630,46 +1709,45 @@
# # ["green", "2", 15],
# # ["orange", "2", 30],
# # ["white", "2", 20]
# # ]
def one_to_many(parent_fields, pattern)
- re = Regexp.new pattern.gsub("%v","(.+?)").gsub("%n","(\\d+?)")
+ re = Regexp.new pattern.gsub('%v','(.+?)').gsub('%n','(\\d+?)')
ds_vars = parent_fields.dup
vars = []
max_n = 0
- h = parent_fields.inject({}) { |a,v|
+ h = parent_fields.each_with_object({}) { |v, a|
a[v] = Daru::Vector.new([])
- a
}
# Adding _row_id
h['_col_id'] = Daru::Vector.new([])
ds_vars.push('_col_id')
@vectors.each do |f|
- if f =~ re
- if !vars.include? $1
- vars.push($1)
- h[$1] = Daru::Vector.new([])
- end
- max_n = $2.to_i if max_n < $2.to_i
+ next unless f =~ re
+ unless vars.include? $1
+ vars.push($1)
+ h[$1] = Daru::Vector.new([])
end
+
+ max_n = $2.to_i if max_n < $2.to_i
end
ds = DataFrame.new(h, order: ds_vars+vars)
each_row do |row|
row_out = {}
parent_fields.each do |f|
row_out[f] = row[f]
end
max_n.times do |n1|
- n = n1+1
+ n = n1+1
any_data = false
vars.each do |v|
- data = row[pattern.gsub("%v",v.to_s).gsub("%n",n.to_s)]
+ data = row[pattern.gsub('%v',v.to_s).gsub('%n',n.to_s)]
row_out[v] = data
- any_data = true if !data.nil?
+ any_data = true unless data.nil?
end
if any_data
row_out['_col_id'] = n
ds.add_row(row_out)
@@ -1683,11 +1761,11 @@
def add_vectors_by_split_recode(name_, join='-', sep=Daru::SPLIT_TOKEN)
split = self[name_].split_by_separator(sep)
i = 1
split.each { |k,v|
new_field = name_.to_s + join + i.to_s
- v.rename name_.to_s + ":" + k.to_s
+ v.rename name_.to_s + ':' + k.to_s
self[new_field.to_sym] = v
i += 1
}
end
@@ -1705,15 +1783,15 @@
# :name => Daru::Vector.new(%w{Alex Peter Susan Mary John})
# })
# ds.create_sql('names')
# #=>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
#
- def create_sql(table,charset="UTF8")
+ def create_sql(table,charset='UTF8')
sql = "CREATE TABLE #{table} ("
- fields = self.vectors.to_a.collect do |f|
+ fields = vectors.to_a.collect do |f|
v = self[f]
- f.to_s + " " + v.db_type
+ f.to_s + ' ' + v.db_type
end
sql + fields.join(",\n ")+") CHARACTER SET=#{charset};"
end
@@ -1722,18 +1800,18 @@
numerics_as_arrays = []
numeric_vectors.each do |n|
numerics_as_arrays << self[n].to_a
end
- GSL::Matrix.alloc *numerics_as_arrays.transpose
+ GSL::Matrix.alloc(*numerics_as_arrays.transpose)
end
# Convert all vectors of type *:numeric* into a Matrix.
def to_matrix
numerics_as_arrays = []
each_vector do |vector|
- numerics_as_arrays << vector.to_a if(vector.type == :numeric)
+ numerics_as_arrays << vector.to_a if vector.type == :numeric
end
Matrix.columns numerics_as_arrays
end
@@ -1744,12 +1822,12 @@
# Convert all vectors of type *:numeric* and not containing nils into an NMatrix.
def to_nmatrix
numerics_as_arrays = []
each_vector do |vector|
- numerics_as_arrays << vector.to_a if(vector.type == :numeric and
- vector.missing_positions.size == 0)
+ numerics_as_arrays << vector.to_a if vector.type == :numeric &&
+ vector.missing_positions.empty?
end
numerics_as_arrays.transpose.to_nm
end
@@ -1758,75 +1836,75 @@
# the array of hashes while the 1th index contains the indexes of each row
# of the dataframe. Each element in the index array corresponds to its row
# in the array of hashes, which has the same index.
def to_a
arry = [[],[]]
- self.each_row do |row|
- arry[0] << row.to_hash
+ each_row do |row|
+ arry[0] << row.to_h
end
arry[1] = @index.to_a
arry
end
# Convert to json. If no_index is false then the index will NOT be included
# in the JSON thus created.
def to_json no_index=true
if no_index
- self.to_a[0].to_json
+ to_a[0].to_json
else
- self.to_a.to_json
+ to_a.to_json
end
end
- # Converts DataFrame to a hash with keys as vector names and values as
+ # Converts DataFrame to a hash (explicit) with keys as vector names and values as
# the corresponding vectors.
- def to_hash
+ def to_h
hsh = {}
@vectors.each_with_index do |vec_name, idx|
hsh[vec_name] = @data[idx]
end
hsh
end
# Convert to html for IRuby.
def to_html threshold=30
- html = "<table>" +
- "<tr>" +
- "<th colspan=\"#{@vectors.size+1}\">" +
- "Daru::DataFrame:#{self.object_id} " + " rows: #{nrows} " + " cols: #{ncols}"
- "</th>" +
- "</tr>"
+ html = '<table>' \
+ '<tr>' \
+ "<th colspan=\"#{@vectors.size+1}\">" \
+ "Daru::DataFrame:#{object_id} " + " rows: #{nrows} " + " cols: #{ncols}" \
+ '</th>' \
+ '</tr>'
html +='<tr><th></th>'
@vectors.each { |vector| html += '<th>' + vector.to_s + '</th>' }
html += '</tr>'
@index.each_with_index do |index, num|
html += '<tr>'
html += '<td>' + index.to_s + '</td>'
- self.row[index].each do |element|
+ row[index].each do |element|
html += '<td>' + element.to_s + '</td>'
end
html += '</tr>'
- if num > threshold
- html += '<tr>'
- (@vectors.size + 1).times { html += '<td>...</td>' }
- html += '</tr>'
+ next if num <= threshold
- last_index = @index.to_a.last
- last_row = self.row[last_index]
- html += '<tr>'
- html += "<td>" + last_index.to_s + "</td>"
- (0..(ncols - 1)).to_a.each do |i|
- html += '<td>' + last_row[i].to_s + '</td>'
- end
- html += '</tr>'
- break
+ html += '<tr>'
+ (@vectors.size + 1).times { html += '<td>...</td>' }
+ html += '</tr>'
+
+ last_index = @index.to_a.last
+ last_row = row[last_index]
+ html += '<tr>'
+ html += '<td>' + last_index.to_s + '</td>'
+ (0..(ncols - 1)).to_a.each do |i|
+ html += '<td>' + last_row[i].to_s + '</td>'
end
+ html += '</tr>'
+ break
end
html += '</table>'
html
end
@@ -1839,11 +1917,11 @@
# after assingment/deletion etc. are complete. This is provided so that
# time is not wasted in creating the metadata for the vector each time
# assignment/deletion of elements is done. Updating data this way is called
# lazy loading. To set or unset lazy loading, see the .lazy_update= method.
def update
- @data.each { |v| v.update } if Daru.lazy_update
+ @data.each(&:update) if Daru.lazy_update
end
# Rename the DataFrame.
def rename new_name
@name = new_name
@@ -1888,23 +1966,22 @@
# ds.write_sql(dbh,"test")
def write_sql dbh, table
Daru::IO.dataframe_write_sql self, dbh, table
end
-
# Use marshalling to save dataframe to a file.
def save filename
Daru::IO.save self, filename
end
- def _dump depth
- Marshal.dump({
+ def _dump(_depth)
+ Marshal.dump(
data: @data,
index: @index.to_a,
order: @vectors.to_a,
name: @name
- })
+ )
end
def self._load data
h = Marshal.load data
Daru::DataFrame.new(h[:data],
@@ -1937,33 +2014,33 @@
# Pretty print in a nice table format for the command line (irb/pry/iruby)
def inspect spacing=10, threshold=15
longest = [@name.to_s.size,
(@vectors.map(&:to_s).map(&:size).max || 0),
(@index .map(&:to_s).map(&:size).max || 0),
- (@data .map{ |v| v.map(&:to_s).map(&:size).max}.max || 0)].max
+ (@data .map { |v| v.map(&:to_s).map(&:size).max }.max || 0)].max
name = @name || 'nil'
- content = ""
+ content = ''
longest = spacing if longest > spacing
formatter = "\n"
(@vectors.size + 1).times { formatter += "%#{longest}.#{longest}s " }
- content += "\n#<" + self.class.to_s + ":" + self.object_id.to_s + " @name = " +
- name.to_s + " @size = " + @size.to_s + ">"
- content += sprintf formatter, "" , *@vectors.map(&:to_s)
+ content += "\n#<" + self.class.to_s + ':' + object_id.to_s + ' @name = ' +
+ name.to_s + ' @size = ' + @size.to_s + '>'
+ content += formatter % ['', *@vectors.map(&:to_s)]
row_num = 1
- self.each_row_with_index do |row, index|
- content += sprintf formatter, index.to_s, *row.to_hash.values.map { |e| (e || 'nil').to_s }
+ each_row_with_index do |row, index|
+ content += formatter % [index.to_s, *row.to_h.values.map { |e| (e || 'nil').to_s }]
row_num += 1
- if row_num > threshold
- dots = []
+ next if row_num <= threshold
- (@vectors.size + 1).times { dots << "..." }
- content += sprintf formatter, *dots
- break
- end
+ dots = []
+
+ (@vectors.size + 1).times { dots << '...' }
+ content += formatter % dots
+ break
end
content += "\n"
content
end
@@ -1972,139 +2049,99 @@
def where bool_array
Daru::Core::Query.df_where self, bool_array
end
def == other
- self.class == other.class and
- @size == other.size and
- @index == other.index and
- @vectors == other.vectors and
- @vectors.to_a.all? { |v| self[v] == other[v] }
+ self.class == other.class &&
+ @size == other.size &&
+ @index == other.index &&
+ @vectors == other.vectors &&
+ @vectors.to_a.all? { |v| self[v] == other[v] }
end
def method_missing(name, *args, &block)
- if md = name.match(/(.+)\=/)
- insert_or_modify_vector name[/(.+)\=/].delete("=").to_sym, args[0]
- elsif self.has_vector? name
+ if name =~ /(.+)\=/
+ insert_or_modify_vector name[/(.+)\=/].delete('=').to_sym, args[0]
+ elsif has_vector? name
self[name]
else
super(name, *args, &block)
end
end
- private
+ private
def possibly_multi_index? index
if @index.is_a?(MultiIndex)
Daru::MultiIndex.from_tuples(index)
else
Daru::Index.new(index)
end
end
- def quick_sort vector_order, index, by, ascending
- recursive_quick_sort vector_order, index, by, ascending, 0, @size-1
- end
-
- # == Arguments
- #
- # vector_order -
- # index -
- # by -
- # ascending -
- # left_lower -
- # right_upper -
- def recursive_quick_sort vector_order, index, by, ascending, left_lower, right_upper
- if left_lower < right_upper
- left_upper, right_lower = partition(vector_order, index, by, ascending, left_lower, right_upper)
- if left_upper - left_lower < right_upper - right_lower
- recursive_quick_sort(vector_order, index, by, ascending, left_lower, left_upper)
- recursive_quick_sort(vector_order, index, by, ascending, right_lower, right_upper)
- else
- recursive_quick_sort(vector_order, index, by, ascending, right_lower, right_upper)
- recursive_quick_sort(vector_order, index, by, ascending, left_lower, left_upper)
- end
+ def create_logic_blocks vector_order, _by, ascending
+ # Create blocks to handle nils
+ blocks = {}
+ universal_block_ascending = ->(a) { [a.nil? ? 0 : 1, a] }
+ universal_block_decending = ->(a) { [a.nil? ? 1 : 0, a] }
+ vector_order.each_with_index do |vector, i|
+ blocks[vector] =
+ if ascending[i]
+ universal_block_ascending
+ else
+ universal_block_decending
+ end
end
+
+ blocks
end
- def partition vector_order, index, by, ascending, left_lower, right_upper
- mindex = (left_lower + right_upper) / 2
- mvalues = vector_order.inject([]) { |a, vector_name| a << self[vector_name][mindex]; a }
- i = left_lower
- j = right_upper
- descending = ascending.map { |a| !a }
+ def build_array_from_blocks vector_order, opts, blocks, r1, r2
+ # Create an array to be used for comparison of two rows in sorting
+ vector_order.map.each_with_index do |v, i|
+ value = if opts[:ascending][i]
+ @data[@vectors[v]].data[r1]
+ else
+ @data[@vectors[v]].data[r2]
+ end
- i += 1 while(keep?(i, mvalues, vector_order, ascending , by, 0))
- j -= 1 while(keep?(j, mvalues, vector_order, descending, by, 0))
+ if opts[:by][v] && !opts[:handle_nils][i]
+ # Block given and nils handled manually
+ value = opts[:by][v].call value
- while i < j - 1
- @data.each do |vector|
- vector[i], vector[j] = vector[j], vector[i]
- end
- index[i], index[j] = index[j], index[i]
- i += 1
- j -= 1
+ elsif opts[:by][v] && opts[:handle_nils][i]
+ # Block given and nils handled automatically
+ value = opts[:by][v].call value rescue nil
+ blocks[v].call value
- i += 1 while(keep?(i, mvalues, vector_order, ascending , by,0))
- j -= 1 while(keep?(j, mvalues, vector_order, descending, by,0))
- end
-
- if i <= j
- if i < j
- @data.each do |vector|
- vector[i], vector[j] = vector[j], vector[i]
- end
- index[i], index[j] = index[j], index[i]
+ else
+ # Block not given and nils handled automatically
+ blocks[v].call value
end
- i += 1
- j -= 1
end
-
- [j,i]
end
- def keep? current_index, mvalues, vector_order, sort_order, by, vector_order_index
- vector_name = vector_order[vector_order_index]
- if vector_name
- vec = self[vector_name]
- eval = by[vector_name].call(vec[current_index], mvalues[vector_order_index])
-
- if sort_order[vector_order_index] # sort in ascending order
- return false if eval == 1
- return true if eval == -1
- if eval == 0
- keep?(current_index, mvalues, vector_order, sort_order, by, vector_order_index + 1)
- end
- else # sort in descending order
- return false if eval == -1
- return true if eval == 1
- if eval == 0
- keep?(current_index, mvalues, vector_order, sort_order, by, vector_order_index + 1)
- end
- end
- end
- end
-
- def create_logic_blocks vector_order, by={}
- universal_block = lambda { |a,b| a <=> b }
- vector_order.each do |vector|
- by[vector] ||= universal_block
- end
-
- by
- end
-
def sort_order_array vector_order, ascending
- if ascending.is_a?(Array)
- raise ArgumentError, "Specify same number of vector names and sort orders" if
+ if ascending.is_a? Array
+ raise ArgumentError, 'Specify same number of vector names and sort orders' if
vector_order.size != ascending.size
return ascending
else
Array.new(vector_order.size, ascending)
end
end
+ def handle_nils_array vector_order, handle_nils
+ if handle_nils.is_a? Array
+ raise ArgumentError, 'Specify same number of vector names and handle nils' if
+ vector_order.size != handle_nils.size
+ return handle_nils
+ else
+ Array.new(vector_order.size, handle_nils)
+ end
+ end
+
def vectors_index_for location
if @vectors.include?(location)
@vectors[location]
elsif location[0].is_a?(Integer)
location[0]
@@ -2116,63 +2153,58 @@
return dup(@vectors[location]) if location.is_a?(Range)
if @vectors.is_a?(MultiIndex)
pos = @vectors[names]
- if pos.is_a?(Integer)
- return @data[pos]
- else # MultiIndex
- new_vectors = pos.map do |tuple|
- @data[@vectors[tuple]]
- end
+ return @data[pos] if pos.is_a?(Integer)
- if !location.is_a?(Range) and names.size < @vectors.width
- pos = pos.drop_left_level names.size
- end
+ # MultiIndex
+ new_vectors = pos.map do |tuple|
+ @data[@vectors[tuple]]
+ end
- Daru::DataFrame.new(
- new_vectors, index: @index, order: pos)
+ if !location.is_a?(Range) && names.size < @vectors.width
+ pos = pos.drop_left_level names.size
end
+
+ Daru::DataFrame.new(new_vectors, index: @index, order: pos)
else
unless names[1]
pos = @vectors[location]
- if pos.is_a?(Numeric)
- return @data[pos]
- else
- names = pos
- end
+ return @data[pos] if pos.is_a?(Numeric)
+
+ names = pos
end
- new_vcs = []
+ new_vectors = {}
names.each do |name|
- new_vcs << @data[@vectors[name]].to_a
+ new_vectors[name] = @data[@vectors[name]]
end
order = names.is_a?(Array) ? Daru::Index.new(names) : names
- Daru::DataFrame.new(new_vcs, order: order,
- index: @index, name: @name)
+ Daru::DataFrame.new(new_vectors, order: order,
+ index: @index, name: @name)
end
end
def access_row *names
location = names[0]
if @index.is_a?(MultiIndex)
pos = @index[names]
if pos.is_a?(Integer)
return Daru::Vector.new(populate_row_for(pos), index: @vectors, name: pos)
- else
- new_rows = pos.map { |tuple| populate_row_for(tuple) }
+ end
- if !location.is_a?(Range) and names.size < @index.width
- pos = pos.drop_left_level names.size
- end
+ new_rows = pos.map { |tuple| populate_row_for(tuple) }
- Daru::DataFrame.rows(
- new_rows, order: @vectors, name: @name, index: pos)
+ if !location.is_a?(Range) && names.size < @index.width
+ pos = pos.drop_left_level names.size
end
+
+ Daru::DataFrame.rows(new_rows, order: @vectors, name: @name, index: pos)
else
if names[1].nil?
names = @index[location]
if names.is_a?(Numeric)
row = []
@@ -2187,11 +2219,11 @@
rows = []
names.each do |name|
rows << self.row[name].to_a
end
- Daru::DataFrame.rows rows, index: names ,name: @name, order: @vectors
+ Daru::DataFrame.rows rows, index: names,name: @name, order: @vectors
end
end
def populate_row_for pos
@data.map do |vector|
@@ -2199,88 +2231,93 @@
end
end
def insert_or_modify_vector name, vector
name = name[0] unless @vectors.is_a?(MultiIndex)
- v = nil
+ vec = nil
if @index.empty?
- v = vector.is_a?(Daru::Vector) ? vector : Daru::Vector.new(vector.to_a)
- @index = v.index
- assign_or_add_vector name, v
+ vec = if vector.is_a?(Daru::Vector)
+ vector
+ else
+ Daru::Vector.new(vector.to_a, name: set_name(name))
+ end
+
+ @index = vec.index
+ assign_or_add_vector name, vec
set_size
@data.map! do |v|
- if v.size == 0
- Daru::Vector.new([nil]*@size, name: set_name(name), index: @index)
+ if v.empty?
+ Daru::Vector.new([nil]*@size, name: set_name(name), metadata: v.metadata, index: @index)
else
v
end
end
else
if vector.is_a?(Daru::Vector)
if vector.index == @index # so that index-by-index assignment is avoided when possible.
- v = vector.dup
+ vec = vector.dup
else
- v = Daru::Vector.new [], name: set_name(name), index: @index
+ vec = Daru::Vector.new [], name: set_name(name), metadata: vector.metadata.dup, index: @index
@index.each do |idx|
- if vector.index.include? idx
- v[idx] = vector[idx]
- else
- v[idx] = nil
- end
+ vec[idx] = vector.index.include?(idx) ? vector[idx] : nil
end
end
else
raise SizeError,
"Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}" if
@size != vector.size
- v = Daru::Vector.new(vector, name: set_name(name), index: @index)
+ vec = Daru::Vector.new(vector, name: set_name(name), index: @index)
end
- assign_or_add_vector name, v
+ assign_or_add_vector name, vec
end
end
def assign_or_add_vector name, v
- #FIXME: fix this jugaad. need to make changes in Indexing itself.
- pos = @vectors[name]
+ # FIXME: fix this jugaad. need to make changes in Indexing itself.
+ begin
+ pos = @vectors[name]
+ rescue IndexError
+ pos = name
+ end
- if !pos.kind_of?(Daru::Index) and pos == name and
- (@vectors.include?(name) or (pos.is_a?(Integer) and pos < @data.size))
+ if !pos.is_a?(Daru::Index) && pos == name &&
+ (@vectors.include?(name) || (pos.is_a?(Integer) && pos < @data.size))
@data[pos] = v
- elsif pos.kind_of?(Daru::Index)
+ elsif pos.is_a?(Daru::Index)
pos.each do |p|
@data[@vectors[p]] = v
end
else
- @vectors = @vectors | [name] if !@vectors.include?(name)
+ @vectors |= [name] unless @vectors.include?(name)
@data[@vectors[name]] = v
end
end
def insert_or_modify_row name, vector
if index.is_a?(MultiIndex)
# TODO
else
name = name[0]
- v =
- if vector.is_a?(Daru::Vector)
- vector
- else
- Daru::Vector.new(vector, name: set_name(name), index: @vectors)
- end
+ vec =
+ if vector.is_a?(Daru::Vector)
+ vector
+ else
+ Daru::Vector.new(vector, name: set_name(name), index: @vectors)
+ end
if @index.include? name
- self.each_vector_with_index do |vector,i|
- vector[name] = v.index.include?(i) ? v[i] : nil
+ each_vector_with_index do |v,i|
+ v[name] = vec.index.include?(i) ? vec[i] : nil
end
else
- @index = @index | [name]
- self.each_vector_with_index do |vector,i|
- vector.concat((v.index.include?(i) ? v[i] : nil), name)
+ @index |= [name]
+ each_vector_with_index do |v,i|
+ v.concat((vec.index.include?(i) ? vec[i] : nil), name)
end
end
set_size
end
@@ -2292,19 +2329,19 @@
end
end
def validate_labels
raise IndexError, "Expected equal number of vector names (#{@vectors.size}) for number of vectors (#{@data.size})." if
- @vectors and @vectors.size != @data.size
+ @vectors && @vectors.size != @data.size
- raise IndexError, "Expected number of indexes same as number of rows" if
- @index and @data[0] and @index.size != @data[0].size
+ raise IndexError, 'Expected number of indexes same as number of rows' if
+ @index && @data[0] && @index.size != @data[0].size
end
def validate_vector_sizes
@data.each do |vector|
- raise IndexError, "Expected vectors with equal length" if vector.size != @size
+ raise IndexError, 'Expected vectors with equal length' if vector.size != @size
end
end
def validate
validate_labels
@@ -2330,18 +2367,18 @@
raise IndexError, "Specified index #{index} does not exist."
end
end
def create_vectors_index_with vectors, source
- vectors = source.keys.sort_by { |a| a.to_s } if vectors.nil?
+ vectors = source.keys.sort_by(&:to_s) if vectors.nil?
@vectors =
- unless vectors.is_a?(Index) or vectors.is_a?(MultiIndex)
- Daru::Index.new((vectors + (source.keys - vectors)).uniq)
- else
- vectors
- end
+ if vectors.is_a?(Index) || vectors.is_a?(MultiIndex)
+ vectors
+ else
+ Daru::Index.new((vectors + (source.keys - vectors)).uniq)
+ end
end
def all_vectors_have_equal_indexes? source
idx = source.values[0].index
@@ -2349,27 +2386,27 @@
idx == vector.index
end
end
def try_create_index index
- index.kind_of?(Index) ? index : Daru::Index.new(index)
+ index.is_a?(Index) ? index : Daru::Index.new(index)
end
- def set_name potential_name
+ def set_name potential_name # rubocop:disable Style/AccessorMethodName
potential_name.is_a?(Array) ? potential_name.join : potential_name
end
def symbolize arry
symbolized_arry =
- if arry.all? { |e| e.is_a?(Array) }
- arry.map do |sub_arry|
- sub_arry.map do |e|
- e.is_a?(Numeric) ? e : e.to_sym
+ if arry.all? { |e| e.is_a?(Array) }
+ arry.map do |sub_arry|
+ sub_arry.map do |e|
+ e.is_a?(Numeric) ? e : e.to_sym
+ end
end
+ else
+ arry.map { |e| e.is_a?(Numeric) ? e : e.to_sym }
end
- else
- arry.map { |e| e.is_a?(Numeric) ? e : e.to_sym }
- end
symbolized_arry
end
end
end