lib/daru/vector.rb in daru-0.1.2 vs lib/daru/vector.rb in daru-0.1.3
- old
+ new
@@ -1,14 +1,12 @@
-$:.unshift File.dirname(__FILE__)
+require 'daru/maths/arithmetic/vector.rb'
+require 'daru/maths/statistics/vector.rb'
+require 'daru/plotting/vector.rb'
+require 'daru/accessors/array_wrapper.rb'
+require 'daru/accessors/nmatrix_wrapper.rb'
+require 'daru/accessors/gsl_wrapper.rb'
-require 'maths/arithmetic/vector.rb'
-require 'maths/statistics/vector.rb'
-require 'plotting/vector.rb'
-require 'accessors/array_wrapper.rb'
-require 'accessors/nmatrix_wrapper.rb'
-require 'accessors/gsl_wrapper.rb'
-
module Daru
class Vector
include Enumerable
include Daru::Maths::Arithmetic::Vector
include Daru::Maths::Statistics::Vector
@@ -26,14 +24,14 @@
@index.each(&block)
self
end
- def each_with_index(&block)
+ def each_with_index
return to_enum(:each_with_index) unless block_given?
- @index.each { |i| yield(self[i], i) }
+ @index.each { |i| yield(self[i], i) }
self
end
def map!(&block)
return to_enum(:map!) unless block_given?
@@ -57,10 +55,14 @@
# An Array or the positions in the vector that are being treated as 'missing'.
attr_reader :missing_positions
# Store a hash of labels for values. Supplementary only. Recommend using index
# for proper usage.
attr_accessor :labels
+ # Store vector data in an array
+ attr_reader :data
+ # Attach arbitrary metadata to vector (usu a hash)
+ attr_accessor :metadata
# Create a Vector object.
#
# == Arguments
#
@@ -95,15 +97,17 @@
if source.is_a?(Hash)
index = source.keys
source = source.values
else
index = opts[:index]
- source = source || []
+ source ||= []
end
- name = opts[:name]
+ name = opts[:name]
set_name name
+ @metadata = opts[:metadata] || {}
+
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
@index = try_create_index(index || @data.size)
if @index.size > @data.size
cast(dtype: :array) # NM with nils seg faults
@@ -135,15 +139,14 @@
# All the rest like .new
def self.new_with_size n, opts={}, &block
value = opts[:value]
opts.delete :value
if block
- vector = Daru::Vector.new n.times.map { |i| block.call(i) }, opts
+ Daru::Vector.new Array.new(n) { |i| block.call(i) }, opts
else
- vector = Daru::Vector.new n.times.map { value }, opts
+ Daru::Vector.new Array.new(n) { value }, opts
end
- vector
end
# Create a vector using (almost) any object
# * Array: flattened
# * Range: transformed using to_a
@@ -178,11 +181,11 @@
when Array
values.concat a.flatten
when Daru::Vector
values.concat a.to_a
when Range
- values.concat a.to_a
+ values.concat a.to_a
else
values << a
end
end
Daru::Vector.new(values)
@@ -197,60 +200,22 @@
# v[:one] # => Single element
# v[:one..:three] # => Daru::Vector with indexes :one, :two and :three
#
# # For vectors employing hierarchial multi index
#
- def [](*indexes)
- location = indexes[0]
- if @index.is_a?(MultiIndex)
- sub_index = @index[indexes]
- result =
- if sub_index.is_a?(Integer)
- @data[sub_index]
- else
- elements = sub_index.map do |tuple|
- @data[@index[tuple]]
- end
+ def [](*input_indexes)
+ # Get a proper index object
+ indexes = @index[*input_indexes]
- if !indexes[0].is_a?(Range) and indexes.size < @index.width
- sub_index = sub_index.drop_left_level indexes.size
- end
- Daru::Vector.new(
- elements, index: sub_index, name: @name, dtype: @dtype)
- end
+ # If one object is asked return it
+ return @data[indexes] if indexes.is_a? Numeric
- return result
- else
- raise TypeError, "Invalid index type #{location.inspect}.\
- \nUsage: v[:a, :b] gives elements with keys :a and :b for vector v." if location.is_a? Array
-
- unless indexes[1]
- case location
- when Range
- first = location.first
- last = location.last
- indexes = @index.slice first, last
- else
- pos = @index[location]
- if pos.is_a?(Numeric)
- return @data[pos]
- else
- indexes = pos
- end
- end
- else
- indexes = indexes.map { |e| named_index_for(e) }
- end
-
- begin
- Daru::Vector.new(
- indexes.map { |loc| @data[@index[loc]] },
- name: @name, index: indexes, dtype: @dtype)
- rescue NoMethodError
- raise IndexError, "Specified index #{pos.inspect} does not exist."
- end
- end
+ # Form a new Vector using indexes and return it
+ Daru::Vector.new(
+ indexes.map { |loc| @data[@index[loc]] },
+ name: @name, metadata: @metadata.dup, index: indexes.conform(input_indexes), dtype: @dtype
+ )
end
# Just like in Hashes, you can specify the index label of the Daru::Vector
# and assign an element an that place in the Daru::Vector.
#
@@ -263,19 +228,18 @@
# # nil
# # a 999
# # b 2
# # c 3
def []=(*location, value)
- cast(dtype: :array) if value.nil? and dtype != :array
+ cast(dtype: :array) if value.nil? && dtype != :array
- @possibly_changed_type = true if @type == :object and (value.nil? or
+ @possibly_changed_type = true if @type == :object && (value.nil? ||
value.is_a?(Numeric))
- @possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and
+ @possibly_changed_type = true if @type == :numeric && (!value.is_a?(Numeric) &&
!value.nil?)
- location = location[0] unless @index.is_a?(MultiIndex)
- pos = @index[location]
+ pos = @index[*location]
if pos.is_a?(Numeric)
@data[pos] = value
else
begin
@@ -313,24 +277,20 @@
# after assingment/deletion etc. are complete. This is provided so that
# time is not wasted in creating the metadata for the vector each time
# assignment/deletion of elements is done. Updating data this way is called
# lazy loading. To set or unset lazy loading, see the .lazy_update= method.
def update
- if Daru.lazy_update
- set_missing_positions
- end
+ Daru.lazy_update and set_missing_positions
end
# Two vectors are equal if the have the exact same index values corresponding
# with the exact same elements. Name is ignored.
def == other
case other
when Daru::Vector
- @index == other.index and @size == other.size and
- @index.all? do |index|
- self[index] == other[index]
- end
+ @index == other.index && @size == other.size &&
+ @index.all? { |index| self[index] == other[index] }
else
super
end
end
@@ -367,16 +327,16 @@
# Define the comparator methods with metaprogramming. See documentation
# written above for functionality of each method. Use these methods with the
# `where` method to obtain the corresponding Vector/DataFrame.
{
- :eq => :==,
- :not_eq => :!=,
- :lt => :<,
- :lteq => :<=,
- :mt => :>,
- :mteq => :>=,
+ eq: :==,
+ not_eq: :!=,
+ lt: :<,
+ lteq: :<=,
+ mt: :>,
+ mteq: :>=
}.each do |method, operator|
define_method(method) do |other|
mod = Daru::Core::Query
if other.is_a?(Daru::Vector)
mod.apply_vector_operator operator, self, other
@@ -401,13 +361,12 @@
# # 2 3
# # 4 5
def in other
other = Hash[other.zip(Array.new(other.size, 0))]
Daru::Core::Query::BoolArray.new(
- @data.inject([]) do |memo, d|
- memo << (other.has_key?(d) ? true : false)
- memo
+ @data.each_with_object([]) do |d, memo|
+ memo << (other.key?(d) ? true : false)
end
)
end
# Return a new vector based on the contents of a boolean array. Use with the
@@ -445,33 +404,36 @@
# # 10 1
# # 11 5
# # 13 5
# # 15 1
def where bool_arry
- Daru::Core::Query.vector_where @data.to_a, @index.to_a, bool_arry, self.dtype
+ Daru::Core::Query.vector_where @data.to_a, @index.to_a, bool_arry, dtype
end
def head q=10
self[0..(q-1)]
end
def tail q=10
self[(@size - q)..(@size-1)]
end
+ def empty?
+ @index.empty?
+ end
+
# Reports whether missing data is present in the Vector.
def has_missing_data?
!missing_positions.empty?
end
alias :flawed? :has_missing_data?
-
# Append an element to the vector by specifying the element and index
def concat element, index
- raise IndexError, "Expected new unique index" if @index.include? index
+ raise IndexError, 'Expected new unique index' if @index.include? index
- @index = @index | [index]
+ @index |= [index]
@data[@index[index]] = element
set_size
set_missing_positions unless Daru.lazy_update
end
@@ -484,18 +446,18 @@
#
# * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
def cast opts={}
dt = opts[:dtype]
raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
- dt == :array or dt == :nmatrix or dt == :gsl
+ dt == :array || dt == :nmatrix || dt == :gsl
@data = cast_vector_to dt unless @dtype == dt
end
# Delete an element by value
def delete element
- self.delete_at index_of(element)
+ delete_at index_of(element)
end
# Delete element by index
def delete_at index
@data.delete_at @index[index]
@@ -512,40 +474,39 @@
# Running through the data to figure out the kind of data is delayed to the
# last possible moment.
def type
return @data.nm_dtype if dtype == :nmatrix
- if @type.nil? or @possibly_changed_type
+ if @type.nil? || @possibly_changed_type
@type = :numeric
- self.each do |e|
- unless e.nil?
- unless e.is_a?(Numeric)
- @type = :object
- break
- end
- end
+ each do |e|
+ next if e.nil? || e.is_a?(Numeric)
+ @type = :object
+ break
end
@possibly_changed_type = false
end
@type
end
# Get index of element
def index_of element
- @index.key @data.index(element)
+ case dtype
+ when :array then @index.key @data.index { |x| x.eql? element }
+ else @index.key @data.index(element)
+ end
end
# Keep only unique elements of the vector alongwith their indexes.
def uniq
uniq_vector = @data.uniq
- new_index = uniq_vector.inject([]) do |acc, element|
+ new_index = uniq_vector.each_with_object([]) do |element, acc|
acc << index_of(element)
- acc
end
- Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
+ Daru::Vector.new uniq_vector, name: @name, metadata: @metadata.dup, index: new_index, dtype: @dtype
end
def any? &block
@data.data.any?(&block)
end
@@ -568,33 +529,38 @@
# == Usage
#
# v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
# # Say you want to sort these strings by length.
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
- def sort opts={}, &block
+ def sort opts={}
opts = {
- ascending: true,
- type: :quick_sort
+ ascending: true
}.merge(opts)
- block = lambda { |a,b|
- return a <=> b if !(a.nil? || b.nil?)
-
- if a.nil? && b.nil?
- 0
- elsif a.nil?
- -1
+ vector_index = @data.each_with_index
+ vector_index =
+ if block_given?
+ vector_index.sort { |a,b| yield(a[0], b[0]) }
else
- 1
+ vector_index.sort { |(av, ai), (bv, bi)|
+ if !av.nil? && !bv.nil?
+ av <=> bv
+ elsif av.nil? && bv.nil?
+ ai <=> bi
+ elsif av.nil?
+ opts[:ascending] ? -1 : 1
+ else
+ opts[:ascending] ? 1 : -1
+ end
+ }
end
- } unless block
+ vector_index.reverse! unless opts[:ascending]
+ vector, index = vector_index.transpose
+ old_index = @index.to_a
+ index = index.map { |i| old_index[i] }
- order = opts[:ascending] ? :ascending : :descending
- vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block)
- index = Daru::Index.new index
-
- Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
+ Daru::Vector.new(vector, index: index, name: @name, metadata: @metadata.dup, dtype: @dtype)
end
# Just sort the data and get an Array in return using Enumerable#sort.
# Non-destructive.
def sorted_data &block
@@ -602,11 +568,11 @@
end
# Returns *true* if the value passed is actually exists or is not marked as
# a *missing value*.
def exists? value
- !@missing_values.has_key?(self[index_of(value)])
+ !@missing_values.key?(self[index_of(value)])
end
# Like map, but returns a Daru::Vector with the returned values.
def recode dt=nil, &block
return to_enum(:recode) unless block_given?
@@ -622,11 +588,11 @@
@data = cast_vector_to(dt || @dtype)
self
end
# Delete an element if block returns true. Destructive.
- def delete_if &block
+ def delete_if
return to_enum(:delete_if) unless block_given?
keep_e = []
keep_i = []
each_with_index do |n, i|
@@ -643,11 +609,11 @@
self
end
# Keep an element if block returns true. Destructive.
- def keep_if &block
+ def keep_if
return to_enum(:keep_if) unless block_given?
keep_e = []
keep_i = []
each_with_index do |n, i|
@@ -665,27 +631,25 @@
self
end
# Reports all values that doesn't comply with a condition.
# Returns a hash with the index of data and the invalid data.
- def verify &block
+ def verify
h = {}
(0...size).each do |i|
- if !(yield @data[i])
- h[i] = @data[i]
- end
+ h[i] = @data[i] unless yield(@data[i])
end
h
end
# Return an Array with the data splitted by a separator.
# a=Daru::Vector.new(["a,b","c,d","a,b","d"])
# a.splitted
# =>
# [["a","b"],["c","d"],["a","b"],["d"]]
- def splitted sep=","
+ def splitted sep=','
@data.map do |s|
if s.nil?
nil
elsif s.respond_to? :split
s.split sep
@@ -706,42 +670,35 @@
# "b"=>#<Daru::Vector:0x7f2dbcc09c48
# @data=[1, 1, 0]>,
# "c"=>#<Daru::Vector:0x7f2dbcc09b08
# @data=[0, 1, 1]>}
#
- def split_by_separator sep=","
+ def split_by_separator sep=','
split_data = splitted sep
factors = split_data.flatten.uniq.compact
- out = factors.inject({}) do |h,x|
- h[x] = []
- h
- end
+ out = factors.map { |x| [x, []] }.to_h
split_data.each do |r|
if r.nil?
factors.each do |f|
out[f].push(nil)
end
else
factors.each do |f|
- out[f].push(r.include?(f) ? 1:0)
+ out[f].push(r.include?(f) ? 1 : 0)
end
end
end
- out.inject({}) do |s,v|
- s[v[0]] = Daru::Vector.new v[1]
- s
- end
+ out.map { |k, v| [k, Daru::Vector.new(v)] }.to_h
end
- def split_by_separator_freq(sep=",")
- split_by_separator(sep).inject({}) do |a,v|
- a[v[0]] = v[1].inject { |s,x| s+x.to_i }
- a
- end
+ def split_by_separator_freq(sep=',')
+ split_by_separator(sep).map do |k, v|
+ [k, v.inject { |s,x| s+x.to_i }]
+ end.to_h
end
def reset_index!
@index = Daru::Index.new(Array.new(size) { |i| i })
self
@@ -806,29 +763,29 @@
# # => [0.69, 0.23, 0.44, 0.71, ...]
#
# ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
# ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
def lag k=1
- return self.dup if k == 0
+ return dup if k == 0
dat = @data.to_a.dup
(dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
(0...k).each { |i| dat[i] = nil }
- Daru::Vector.new(dat, index: @index, name: @name)
+ Daru::Vector.new(dat, index: @index, name: @name, metadata: @metadata.dup)
end
def detach_index
- Daru::DataFrame.new({
+ Daru::DataFrame.new(
index: @index.to_a,
values: @data.to_a
- })
+ )
end
# Non-destructive version of #replace_nils!
def replace_nils replacement
- self.dup.replace_nils!(replacement)
+ dup.replace_nils!(replacement)
end
# number of non-missing elements
def n_valid
@size - missing_positions.size
@@ -855,62 +812,51 @@
end
# If dtype != gsl, will convert data to GSL::Vector with to_a. Otherwise returns
# the stored GSL::Vector object.
def to_gsl
- if Daru.has_gsl?
- if dtype == :gsl
- return @data.data
- else
- GSL::Vector.alloc only_valid(:array).to_a
- end
- else
- raise NoMethodError, "Install gsl-nmatrix for access to this functionality."
- end
+ raise NoMethodError, 'Install gsl-nmatrix for access to this functionality.' unless Daru.has_gsl?
+ dtype == :gsl ? @data.data : GSL::Vector.alloc(only_valid(:array).to_a)
end
- # Convert to hash. Hash keys are indexes and values are the correspoding elements
- def to_hash
- @index.inject({}) do |hsh, index|
- hsh[index] = self[index]
- hsh
- end
+ # Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
+ def to_h
+ @index.map { |index| [index, self[index]] }.to_h
end
# Return an array
def to_a
@data.to_a
end
- # Convert the hash from to_hash to json
- def to_json *args
- self.to_hash.to_json
+ # Convert the hash from to_h to json
+ def to_json(*)
+ to_h.to_json
end
# Convert to html for iruby
def to_html threshold=30
name = @name || 'nil'
- html = "<table>" +
- "<tr>" +
- "<th colspan=\"2\">" +
- "Daru::Vector:#{self.object_id} " + " size: #{size}" +
- "</th>" +
- "</tr>"
+ html = '<table>' \
+ '<tr>' \
+ '<th colspan="2">' \
+ "Daru::Vector:#{object_id} " + " size: #{size}" \
+ '</th>' \
+ '</tr>'
html += '<tr><th> </th><th>' + name.to_s + '</th></tr>'
@index.each_with_index do |index, num|
html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
- if num > threshold
- html += '<tr><td>...</td><td>...</td></tr>'
+ next if num <= threshold
+ html += '<tr><td>...</td><td>...</td></tr>'
- last_index = @index.to_a.last
- html += '<tr>' +
- '<td>' + last_index.to_s + '</td>' +
- '<td>' + self[last_index].to_s + '</td>' +
- '</tr>'
- break
- end
+ last_index = @index.to_a.last
+ html += '<tr>' \
+ '<td>' + last_index.to_s + '</td>' \
+ '<td>' + self[last_index].to_s + '</td>' \
+ '</tr>'
+ break
end
html += '</table>'
html
end
@@ -918,61 +864,65 @@
def to_s
to_html
end
# Create a summary of the Vector using Report Builder.
- def summary(method = :to_text)
+ def summary(method=:to_text)
ReportBuilder.new(no_title: true).add(self).send(method)
end
def report_building b
- b.section(:name => name) do |s|
+ b.section(name: name) do |s|
s.text "n :#{size}"
s.text "n valid:#{n_valid}"
if @type == :object
s.text "factors: #{factors.to_a.join(',')}"
s.text "mode: #{mode}"
- s.table(:name => "Distribution") do |t|
- frequencies.sort_by { |a| a.to_s }.each do |k,v|
+ s.table(name: 'Distribution') do |t|
+ frequencies.sort_by(&:to_s).each do |k,v|
key = @index.include?(k) ? @index[k] : k
- t.row [key, v , ("%0.2f%%" % (v.quo(n_valid)*100))]
+ t.row [key, v, ('%0.2f%%' % (v.quo(n_valid)*100))]
end
end
end
- s.text "median: #{median.to_s}" if (@type==:numeric or @type==:numeric)
+ s.text "median: #{median}" if @type==:numeric || @type==:numeric
if @type==:numeric
- s.text "mean: %0.4f" % mean
+ s.text 'mean: %0.4f' % mean
if sd
- s.text "std.dev.: %0.4f" % sd
- s.text "std.err.: %0.4f" % se
- s.text "skew: %0.4f" % skew
- s.text "kurtosis: %0.4f" % kurtosis
+ s.text 'std.dev.: %0.4f' % sd
+ s.text 'std.err.: %0.4f' % se
+ s.text 'skew: %0.4f' % skew
+ s.text 'kurtosis: %0.4f' % kurtosis
end
end
end
end
# Over rides original inspect for pretty printing in irb
def inspect spacing=20, threshold=15
- longest = [@name.to_s.size,
- (@index.to_a.map(&:to_s).map(&:size).max || 0),
- (@data .map(&:to_s).map(&:size).max || 0),
- 'nil'.size].max
+ longest =
+ [
+ @name.to_s.size,
+ (@index.to_a.map(&:to_s).map(&:size).max || 0),
+ (@data.map(&:to_s).map(&:size).max || 0),
+ 3 # 'nil'.size
+ ].max
- content = ""
+ content = ''
longest = spacing if longest > spacing
name = @name || 'nil'
+ metadata = @metadata || 'nil'
formatter = "\n%#{longest}.#{longest}s %#{longest}.#{longest}s"
- content += "\n#<" + self.class.to_s + ":" + self.object_id.to_s + " @name = " + name.to_s + " @size = " + size.to_s + " >"
+ content += "\n#<#{self.class}:#{object_id} @name = #{name} @metadata = #{metadata} @size = #{size} >"
- content += sprintf formatter, "", name
+ content += formatter % ['', name]
@index.each_with_index do |index, num|
- content += sprintf formatter, index.to_s, (self[*index] || 'nil').to_s
+ content += formatter % [index.to_s, (self[*index] || 'nil').to_s]
if num > threshold
- content += sprintf formatter, '...', '...'
+ content += formatter % ['...', '...']
break
end
end
content += "\n"
@@ -980,29 +930,25 @@
end
# Create a new vector with a different index, and preserve the indexing of
# current elements.
def reindex new_index
- vector = Daru::Vector.new([], index: new_index, name: @name)
+ vector = Daru::Vector.new([], index: new_index, name: @name, metadata: @metadata.dup)
new_index.each do |idx|
- if @index.include?(idx)
- vector[idx] = self[idx]
- else
- vector[idx] = nil
- end
+ vector[idx] = @index.include?(idx) ? self[idx] : nil
end
vector
end
def index= idx
raise ArgumentError,
"Size of supplied index #{index.size} does not match size of DataFrame" if
- idx.size != self.size
- raise ArgumentError, "Can only assign type Index and its subclasses." unless
- idx.kind_of?(Daru::Index)
+ idx.size != size
+ raise ArgumentError, 'Can only assign type Index and its subclasses.' unless
+ idx.is_a?(Daru::Index)
@index = idx
self
end
@@ -1018,11 +964,11 @@
@name = new_name
end
# Duplicate elements and indexes
def dup
- Daru::Vector.new @data.dup, name: @name, index: @index.dup
+ Daru::Vector.new @data.dup, name: @name, metadata: @metadata.dup, index: @index.dup
end
# == Bootstrap
# Generate +nr+ resamples (with replacement) of size +s+
# from vector, computing each estimate from +estimators+
@@ -1040,11 +986,11 @@
# of length +nr+ containing the computed resample estimates.
def bootstrap(estimators, nr, s=nil)
s ||= size
h_est, es, bss = prepare_bootstrap(estimators)
- nr.times do |i|
+ nr.times do
bs = sample_with_replacement(s)
es.each do |estimator|
bss[estimator].push(h_est[estimator].call(bs))
end
end
@@ -1077,24 +1023,22 @@
raise "n should be divisible by k:#{k}" unless size % k==0
nb = (size / k).to_i
h_est, es, ps = prepare_bootstrap(estimators)
- est_n = es.inject({}) do |h,v|
- h[v] = h_est[v].call(self)
- h
- end
+ est_n = es.map { |v| [v, h_est[v].call(self)] }.to_h
nb.times do |i|
other = @data.dup
other.slice!(i*k, k)
other = Daru::Vector.new other
es.each do |estimator|
# Add pseudovalue
ps[estimator].push(
- nb * est_n[estimator] - (nb-1) * h_est[estimator].call(other))
+ nb * est_n[estimator] - (nb-1) * h_est[estimator].call(other)
+ )
end
end
es.each do |est|
ps[est] = Daru::Vector.new ps[est]
@@ -1112,22 +1056,22 @@
# @duplicate [Symbol] In case no missing data is found in the
# vector, setting this to false will return the same vector.
# Otherwise, a duplicate will be returned irrespective of
# presence of missing data.
def only_valid as_a=:vector, duplicate=true
- return self.dup if !has_missing_data? and as_a == :vector and duplicate
- return self if !has_missing_data? and as_a == :vector and !duplicate
- return self.to_a if !has_missing_data? and as_a != :vector
+ return dup if !has_missing_data? && as_a == :vector && duplicate
+ return self if !has_missing_data? && as_a == :vector && !duplicate
+ return to_a if !has_missing_data? && as_a != :vector
new_index = @index.to_a - missing_positions
new_vector = new_index.map do |idx|
self[idx]
end
return new_vector if as_a != :vector
- Daru::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
+ Daru::Vector.new new_vector, index: new_index, name: @name, metadata: @metadata.dup, dtype: dtype
end
# Returns a Vector containing only missing data (preserves indexes).
def only_missing as_a=:vector
if as_a == :vector
@@ -1141,34 +1085,36 @@
# but non-Numeric objects are excluded. Preserves index.
def only_numerics
numeric_indexes = []
each_with_index do |v, i|
- numeric_indexes << i if(v.kind_of?(Numeric) or @missing_values.has_key?(v))
+ numeric_indexes << i if v.is_a?(Numeric) || @missing_values.key?(v)
end
self[*numeric_indexes]
end
# Returns the database type for the vector, according to its content
- def db_type(dbs=:mysql)
+ def db_type
# first, detect any character not number
- if @data.find {|v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/} or @data.find {|v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/}
- return "DATE"
- elsif @data.find {|v| v.to_s=~/[^0-9e.-]/ }
- return "VARCHAR (255)"
- elsif @data.find {|v| v.to_s=~/\./}
- return "DOUBLE"
+ if @data.find { |v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/ } ||
+ @data.find { |v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/ }
+
+ return 'DATE'
+ elsif @data.find { |v| v.to_s=~/[^0-9e.-]/ }
+ return 'VARCHAR (255)'
+ elsif @data.find { |v| v.to_s=~/\./ }
+ return 'DOUBLE'
else
- return "INTEGER"
+ return 'INTEGER'
end
end
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
# all values with nils.
def clone_structure
- Daru::Vector.new(([nil]*@size), name: @name, index: @index.dup)
+ Daru::Vector.new(([nil]*@size), name: @name, metadata: @metadata.dup, index: @index.dup)
end
# Save the vector to a file
#
# == Arguments
@@ -1176,153 +1122,96 @@
# * filename - Path of file where the vector is to be saved
def save filename
Daru::IO.save self, filename
end
- def _dump(depth) # :nodoc:
- Marshal.dump({
- data: @data.to_a,
- dtype: @dtype,
- name: @name,
- index: @index,
- missing_values: @missing_values})
+ def _dump(*) # :nodoc:
+ Marshal.dump(
+ data: @data.to_a,
+ dtype: @dtype,
+ name: @name,
+ metadata: @metadata,
+ index: @index,
+ missing_values: @missing_values
+ )
end
def self._load(data) # :nodoc:
h = Marshal.load(data)
- Daru::Vector.new(h[:data], index: h[:index],
- name: h[:name], dtype: h[:dtype], missing_values: h[:missing_values])
+ Daru::Vector.new(h[:data],
+ index: h[:index],
+ name: h[:name], metadata: h[:metadata],
+ dtype: h[:dtype], missing_values: h[:missing_values])
end
- def daru_vector *name
+ def daru_vector(*)
self
end
alias :dv :daru_vector
def method_missing(name, *args, &block)
- if name.match(/(.+)\=/)
+ if name =~ /(.+)\=/
self[name] = args[0]
elsif has_index?(name)
self[name]
else
super(name, *args, &block)
end
end
- private
+ private
# For an array or hash of estimators methods, returns
# an array with three elements
# 1.- A hash with estimators names as keys and lambdas as values
# 2.- An array with estimators names
# 3.- A Hash with estimators names as keys and empty arrays as values
def prepare_bootstrap(estimators)
h_est = estimators
- h_est = [h_est] unless h_est.is_a?(Array) or h_est.is_a?(Hash)
+ h_est = [h_est] unless h_est.is_a?(Array) || h_est.is_a?(Hash)
if h_est.is_a? Array
- h_est = h_est.inject({}) do |h, est|
- h[est] = lambda { |v| Daru::Vector.new(v).send(est) }
- h
- end
+ h_est = h_est.map do |est|
+ [est, ->(v) { Daru::Vector.new(v).send(est) }]
+ end.to_h
end
- bss = h_est.keys.inject({}) { |h,v| h[v] = []; h }
+ bss = h_est.keys.map { |v| [v, []] }.to_h
[h_est, h_est.keys, bss]
end
- def quick_sort vector, index, order, &block
- recursive_quick_sort vector, index, order, 0, @size-1, &block
- [vector, index]
- end
-
- def recursive_quick_sort vector, index, order, left_lower, right_upper, &block
- if left_lower < right_upper
- left_upper, right_lower = partition(vector, index, order, left_lower, right_upper, &block)
- if left_upper - left_lower < right_upper - right_lower
- recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
- recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
- else
- recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
- recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
- end
- end
- end
-
- def partition vector, index, order, left_lower, right_upper, &block
- mindex = (left_lower + right_upper) / 2
- mvalue = vector[mindex]
- i = left_lower
- j = right_upper
- opposite_order = order == :ascending ? :descending : :ascending
-
- i += 1 while(keep?(vector[i], mvalue, order, &block))
- j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
-
- while i < j - 1
- vector[i], vector[j] = vector[j], vector[i]
- index[i], index[j] = index[j], index[i]
- i += 1
- j -= 1
-
- i += 1 while(keep?(vector[i], mvalue, order, &block))
- j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
- end
-
- if i <= j
- if i < j
- vector[i], vector[j] = vector[j], vector[i]
- index[i], index[j] = index[j], index[i]
- end
- i += 1
- j -= 1
- end
-
- [j,i]
- end
-
- def keep? a, b, order, &block
- eval = block.call(a,b)
+ def keep? a, b, order
+ eval = yield(a, b)
if order == :ascending
return true if eval == -1
return false if eval == 1
elsif order == :descending
return false if eval == -1
return true if eval == 1
end
- return false
+ false
end
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
# @dtype variable is set and the underlying data type of vector changed.
def cast_vector_to dtype, source=nil, nm_dtype=nil
source = @data.to_a if source.nil?
new_vector =
- case dtype
- when :array then Daru::Accessors::ArrayWrapper.new(source, self)
- when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype)
- when :gsl then Daru::Accessors::GSLWrapper.new(source, self)
- when :mdarray then raise NotImplementedError, "MDArray not yet supported."
- else raise "Unknown dtype #{dtype}"
- end
+ case dtype
+ when :array then Daru::Accessors::ArrayWrapper.new(source, self)
+ when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype)
+ when :gsl then Daru::Accessors::GSLWrapper.new(source, self)
+ when :mdarray then raise NotImplementedError, 'MDArray not yet supported.'
+ else raise "Unknown dtype #{dtype}"
+ end
@dtype = dtype || :array
new_vector
end
- def named_index_for index
- if @index.include? index
- index
- elsif @index.key index
- @index.key index
- else
- raise IndexError, "Specified index #{index} does not exist."
- end
- end
-
def index_for index
if @index.include?(index)
@index[index]
elsif index.is_a?(Numeric)
index
@@ -1331,29 +1220,29 @@
def set_size
@size = @data.size
end
- def set_name name
+ def set_name name # rubocop:disable Style/AccessorMethodName
@name =
- if name.is_a?(Numeric) then name
- elsif name.is_a?(Array) then name.join # in case of MultiIndex tuple
- elsif name then name # anything but Numeric or nil
- else
- nil
- end
+ if name.is_a?(Numeric) then name
+ elsif name.is_a?(Array) then name.join # in case of MultiIndex tuple
+ elsif name then name # anything but Numeric or nil
+ else
+ nil
+ end
end
def set_missing_positions
@missing_positions = []
@index.each do |e|
- @missing_positions << e if (@missing_values.has_key?(self[e]))
+ @missing_positions << e if @missing_values.key?(self[e])
end
end
def try_create_index potential_index
- if potential_index.is_a?(Daru::MultiIndex) or potential_index.is_a?(Daru::Index)
+ if potential_index.is_a?(Daru::MultiIndex) || potential_index.is_a?(Daru::Index)
potential_index
else
Daru::Index.new(potential_index)
end
end
@@ -1363,14 +1252,16 @@
pos ? @data[pos] : nil
end
# Setup missing_values. The missing_values instance variable is set
# as a Hash for faster lookup times.
- def set_missing_values values_arry
+ def set_missing_values values_arry # rubocop:disable Style/AccessorMethodName
@missing_values = {}
@missing_values[nil] = 0
if values_arry
values_arry.each do |e|
+ # If dtype is :gsl then missing values have to be converted to float
+ e = e.to_f if dtype == :gsl && e.is_a?(Numeric)
@missing_values[e] = 0
end
end
end
end