lib/daru/vector.rb in daru-0.1.1 vs lib/daru/vector.rb in daru-0.1.2
- old
+ new
@@ -14,11 +14,11 @@
include Daru::Maths::Statistics::Vector
include Daru::Plotting::Vector if Daru.has_nyaplot?
def each(&block)
return to_enum(:each) unless block_given?
-
+
@data.each(&block)
self
end
def each_index(&block)
@@ -59,37 +59,37 @@
# Store a hash of labels for values. Supplementary only. Recommend using index
# for proper usage.
attr_accessor :labels
# Create a Vector object.
- #
+ #
# == Arguments
- #
- # @param source[Array,Hash] - Supply elements in the form of an Array or a
- # Hash. If Array, a numeric index will be created if not supplied in the
- # options. Specifying more index elements than actual values in *source*
- # will insert *nil* into the surplus index elements. When a Hash is specified,
- # the keys of the Hash are taken as the index elements and the corresponding
+ #
+ # @param source[Array,Hash] - Supply elements in the form of an Array or a
+ # Hash. If Array, a numeric index will be created if not supplied in the
+ # options. Specifying more index elements than actual values in *source*
+ # will insert *nil* into the surplus index elements. When a Hash is specified,
+ # the keys of the Hash are taken as the index elements and the corresponding
# values as the values that populate the vector.
- #
+ #
# == Options
- #
+ #
# * +:name+ - Name of the vector
- #
+ #
# * +:index+ - Index of the vector
- #
- # * +:dtype+ - The underlying data type. Can be :array, :nmatrix or :gsl.
+ #
+ # * +:dtype+ - The underlying data type. Can be :array, :nmatrix or :gsl.
# Default :array.
- #
+ #
# * +:nm_dtype+ - For NMatrix, the data type of the numbers. See the NMatrix docs for
# further information on supported data type.
- #
+ #
# * +:missing_values+ - An Array of the values that are to be treated as 'missing'.
# nil is the default missing value.
- #
+ #
# == Usage
- #
+ #
# vecarr = Daru::Vector.new [1,2,3,4], index: [:a, :e, :i, :o]
# vechsh = Daru::Vector.new({a: 1, e: 2, i: 3, o: 4})
def initialize source, opts={}
index = nil
if source.is_a?(Hash)
@@ -102,11 +102,11 @@
name = opts[:name]
set_name name
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
@index = try_create_index(index || @data.size)
-
+
if @index.size > @data.size
cast(dtype: :array) # NM with nils seg faults
(@index.size - @data.size).times { @data << nil }
elsif @index.size < @data.size
raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
@@ -118,15 +118,15 @@
set_size
end
# Create a new vector by specifying the size and an optional value
# and block to generate values.
- #
+ #
# == Description
#
# The *new_with_size* class method lets you create a Daru::Vector
- # by specifying the size as the argument. The optional block, if
+ # by specifying the size as the argument. The optional block, if
# supplied, is run once for populating each element in the Vector.
#
# The result of each run of the block is the value that is ultimately
# assigned to that position in the Vector.
#
@@ -147,19 +147,19 @@
# Create a vector using (almost) any object
# * Array: flattened
# * Range: transformed using to_a
# * Daru::Vector
# * Numeric and string values
- #
+ #
# == Description
#
# The `Vector.[]` class method creates a vector from almost any
# object that has a `#to_a` method defined on it. It is similar
# to R's `c` method.
- #
+ #
# == Usage
- #
+ #
# a = Daru::Vector[1,2,3,4,6..10]
# #=>
# # <Daru::Vector:99448510 @name = nil @size = 9 >
# # nil
# # 0 1
@@ -187,25 +187,25 @@
end
Daru::Vector.new(values)
end
# Get one or more elements with specified index or a range.
- #
+ #
# == Usage
# # For vectors employing single layer Index
- #
+ #
# v[:one, :two] # => Daru::Vector with indexes :one and :two
# v[:one] # => Single element
# v[:one..:three] # => Daru::Vector with indexes :one, :two and :three
- #
+ #
# # For vectors employing hierarchial multi index
- #
+ #
def [](*indexes)
location = indexes[0]
if @index.is_a?(MultiIndex)
sub_index = @index[indexes]
- result =
+ result =
if sub_index.is_a?(Integer)
@data[sub_index]
else
elements = sub_index.map do |tuple|
@data[@index[tuple]]
@@ -218,10 +218,13 @@
elements, index: sub_index, name: @name, dtype: @dtype)
end
return result
else
+ raise TypeError, "Invalid index type #{location.inspect}.\
+ \nUsage: v[:a, :b] gives elements with keys :a and :b for vector v." if location.is_a? Array
+
unless indexes[1]
case location
when Range
first = location.first
last = location.last
@@ -236,64 +239,72 @@
end
else
indexes = indexes.map { |e| named_index_for(e) }
end
- Daru::Vector.new(
- indexes.map { |loc| @data[@index[loc]] },
- name: @name, index: indexes, dtype: @dtype)
+ begin
+ Daru::Vector.new(
+ indexes.map { |loc| @data[@index[loc]] },
+ name: @name, index: indexes, dtype: @dtype)
+ rescue NoMethodError
+ raise IndexError, "Specified index #{pos.inspect} does not exist."
+ end
end
end
# Just like in Hashes, you can specify the index label of the Daru::Vector
# and assign an element an that place in the Daru::Vector.
- #
+ #
# == Usage
- #
+ #
# v = Daru::Vector.new([1,2,3], index: [:a, :b, :c])
# v[:a] = 999
- # #=>
+ # #=>
# ##<Daru::Vector:90257920 @name = nil @size = 3 >
# # nil
# # a 999
# # b 2
# # c 3
def []=(*location, value)
cast(dtype: :array) if value.nil? and dtype != :array
- @possibly_changed_type = true if @type == :object and (value.nil? or
+ @possibly_changed_type = true if @type == :object and (value.nil? or
value.is_a?(Numeric))
@possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and
!value.nil?)
location = location[0] unless @index.is_a?(MultiIndex)
pos = @index[location]
if pos.is_a?(Numeric)
@data[pos] = value
else
- pos.each { |tuple| self[tuple] = value }
+ begin
+ pos.each { |tuple| self[tuple] = value }
+ rescue NoMethodError
+ raise IndexError, "Specified index #{pos.inspect} does not exist."
+ end
end
set_size
set_missing_positions unless Daru.lazy_update
end
# The values to be treated as 'missing'. *nil* is the default missing
# type. To set missing values see the missing_values= method.
def missing_values
- @missing_values.keys
+ @missing_values.keys
end
# Assign an Array to treat certain values as 'missing'.
- #
+ #
# == Usage
- #
+ #
# v = Daru::Vector.new [1,2,3,4,5]
# v.missing_values = [3]
# v.update
- # v.missing_positions
+ # v.missing_positions
# #=> [2]
def missing_values= values
set_missing_values values
set_missing_positions unless Daru.lazy_update
end
@@ -368,25 +379,25 @@
define_method(method) do |other|
mod = Daru::Core::Query
if other.is_a?(Daru::Vector)
mod.apply_vector_operator operator, self, other
else
- mod.apply_scalar_operator operator, @data,other
+ mod.apply_scalar_operator operator, @data,other
end
end
end
alias :gt :mt
- alias :gteq :mteq
+ alias :gteq :mteq
# Comparator for checking if any of the elements in *other* exist in self.
#
# @param [Array, Daru::Vector] other A collection which has elements that
# need to be checked for in self.
# @example Usage of `in`.
# vector = Daru::Vector.new([1,2,3,4,5])
# vector.where(vector.in([3,5]))
- # #=>
+ # #=>
# ##<Daru::Vector:82215960 @name = nil @size = 2 >
# # nil
# # 2 3
# # 4 5
def in other
@@ -398,36 +409,36 @@
end
)
end
# Return a new vector based on the contents of a boolean array. Use with the
- # comparator methods to obtain meaningful results. See this notebook for
+ # comparator methods to obtain meaningful results. See this notebook for
# a good overview of using #where.
#
- # @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The
+ # @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The
# collection containing the true of false values. Each element in the Vector
# corresponding to a `true` in the bool_arry will be returned alongwith it's
# index.
# @exmaple Usage of #where.
# vector = Daru::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
#
# # Simple logic statement passed to #where.
# vector.where(vector.eq(5).or(vector.eq(1)))
- # # =>
+ # # =>
# ##<Daru::Vector:77626210 @name = nil @size = 7 >
# # nil
# # 2 5
# # 4 5
# # 7 5
# # 10 1
# # 11 5
# # 13 5
# # 15 1
- #
+ #
# # A somewhat more complex logic statement
# vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
- # #=>
+ # #=>
# ##<Daru::Vector:81072310 @name = nil @size = 7 >
# # nil
# # 2 5
# # 4 5
# # 7 5
@@ -462,29 +473,29 @@
@data[@index[index]] = element
set_size
set_missing_positions unless Daru.lazy_update
end
- alias :push :concat
+ alias :push :concat
alias :<< :concat
# Cast a vector to a new data type.
- #
+ #
# == Options
- #
+ #
# * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
def cast opts={}
dt = opts[:dtype]
- raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
+ raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
dt == :array or dt == :nmatrix or dt == :gsl
@data = cast_vector_to dt unless @dtype == dt
end
# Delete an element by value
def delete element
- self.delete_at index_of(element)
+ self.delete_at index_of(element)
end
# Delete element by index
def delete_at index
@data.delete_at @index[index]
@@ -495,13 +506,13 @@
end
# The type of data contained in the vector. Can be :object or :numeric. If
# the underlying dtype is an NMatrix, this method will return the data type
# of the NMatrix object.
- #
+ #
# Running through the data to figure out the kind of data is delayed to the
- # last possible moment.
+ # last possible moment.
def type
return @data.nm_dtype if dtype == :nmatrix
if @type.nil? or @possibly_changed_type
@type = :numeric
@@ -525,12 +536,12 @@
end
# Keep only unique elements of the vector alongwith their indexes.
def uniq
uniq_vector = @data.uniq
- new_index = uniq_vector.inject([]) do |acc, element|
- acc << index_of(element)
+ new_index = uniq_vector.inject([]) do |acc, element|
+ acc << index_of(element)
acc
end
Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
end
@@ -542,41 +553,51 @@
def all? &block
@data.data.all?(&block)
end
# Sorts a vector according to its values. If a block is specified, the contents
- # will be evaluated and data will be swapped whenever the block evaluates
+ # will be evaluated and data will be swapped whenever the block evaluates
# to *true*. Defaults to ascending order sorting. Any missing values will be
# put at the end of the vector. Preserves indexing. Default sort algorithm is
# quick sort.
- #
+ #
# == Options
- #
+ #
# * +:ascending+ - if false, will sort in descending order. Defaults to true.
- #
+ #
# * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
# == Usage
- #
+ #
# v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
# # Say you want to sort these strings by length.
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
def sort opts={}, &block
opts = {
ascending: true,
type: :quick_sort
}.merge(opts)
- block = lambda { |a,b| a <=> b } unless block
-
+ block = lambda { |a,b|
+ return a <=> b if !(a.nil? || b.nil?)
+
+ if a.nil? && b.nil?
+ 0
+ elsif a.nil?
+ -1
+ else
+ 1
+ end
+ } unless block
+
order = opts[:ascending] ? :ascending : :descending
vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block)
index = Daru::Index.new index
Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
end
- # Just sort the data and get an Array in return using Enumerable#sort.
+ # Just sort the data and get an Array in return using Enumerable#sort.
# Non-destructive.
def sorted_data &block
@data.to_a.sort(&block)
end
@@ -726,16 +747,16 @@
self
end
# Returns a vector which has *true* in the position where the element in self
# is nil, and false otherwise.
- #
+ #
# == Usage
- #
+ #
# v = Daru::Vector.new([1,2,4,nil])
# v.is_nil?
- # # =>
+ # # =>
# #<Daru::Vector:89421000 @name = nil @size = 4 >
# # nil
# # 0 false
# # 1 false
# # 2 false
@@ -759,13 +780,13 @@
nil_truth_vector
end
# Replace all nils in the vector with the value passed as an argument. Destructive.
# See #replace_nils for non-destructive version
- #
+ #
# == Arguments
- #
+ #
# * +replacement+ - The value which should replace all nils
def replace_nils! replacement
missing_positions.each do |idx|
self[idx] = replacement
end
@@ -817,13 +838,13 @@
def has_index? index
@index.include? index
end
# Convert Vector to a horizontal or vertical Ruby Matrix.
- #
+ #
# == Arguments
- #
+ #
# * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
def to_matrix axis=:horizontal
if axis == :horizontal
Matrix[to_a]
elsif axis == :vertical
@@ -859,32 +880,32 @@
def to_a
@data.to_a
end
# Convert the hash from to_hash to json
- def to_json *args
+ def to_json *args
self.to_hash.to_json
end
# Convert to html for iruby
def to_html threshold=30
name = @name || 'nil'
- html = "<table>" +
+ html = "<table>" +
"<tr>" +
- "<th colspan=\"2\">" +
- "Daru::Vector:#{self.object_id} " + " size: #{size}" +
+ "<th colspan=\"2\">" +
+ "Daru::Vector:#{self.object_id} " + " size: #{size}" +
"</th>" +
"</tr>"
html += '<tr><th> </th><th>' + name.to_s + '</th></tr>'
@index.each_with_index do |index, num|
html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
-
+
if num > threshold
html += '<tr><td>...</td><td>...</td></tr>'
last_index = @index.to_a.last
- html += '<tr>' +
+ html += '<tr>' +
'<td>' + last_index.to_s + '</td>' +
'<td>' + self[last_index].to_s + '</td>' +
'</tr>'
break
end
@@ -933,11 +954,11 @@
end
# Over rides original inspect for pretty printing in irb
def inspect spacing=20, threshold=15
longest = [@name.to_s.size,
- (@index.to_a.map(&:to_s).map(&:size).max || 0),
+ (@index.to_a.map(&:to_s).map(&:size).max || 0),
(@data .map(&:to_s).map(&:size).max || 0),
'nil'.size].max
content = ""
longest = spacing if longest > spacing
@@ -973,34 +994,34 @@
vector
end
def index= idx
- raise ArgumentError,
- "Size of supplied index #{index.size} does not match size of DataFrame" if
+ raise ArgumentError,
+ "Size of supplied index #{index.size} does not match size of DataFrame" if
idx.size != self.size
- raise ArgumentError, "Can only assign type Index and its subclasses." unless
+ raise ArgumentError, "Can only assign type Index and its subclasses." unless
idx.kind_of?(Daru::Index)
-
+
@index = idx
self
end
# Give the vector a new name
- #
+ #
# @param new_name [Symbol] The new name.
def rename new_name
if new_name.is_a?(Numeric)
- @name = new_name
+ @name = new_name
return
end
-
+
@name = new_name
end
# Duplicate elements and indexes
- def dup
+ def dup
Daru::Vector.new @data.dup, name: @name, index: @index.dup
end
# == Bootstrap
# Generate +nr+ resamples (with replacement) of size +s+
@@ -1080,19 +1101,19 @@
end
Daru::DataFrame.new ps
end
# Creates a new vector consisting only of non-nil data
- #
+ #
# == Arguments
- #
+ #
# @as_a [Symbol] Passing :array will return only the elements
# as an Array. Otherwise will return a Daru::Vector.
- #
+ #
# @duplicate [Symbol] In case no missing data is found in the
# vector, setting this to false will return the same vector.
- # Otherwise, a duplicate will be returned irrespective of
+ # Otherwise, a duplicate will be returned irrespective of
# presence of missing data.
def only_valid as_a=:vector, duplicate=true
return self.dup if !has_missing_data? and as_a == :vector and duplicate
return self if !has_missing_data? and as_a == :vector and !duplicate
return self.to_a if !has_missing_data? and as_a != :vector
@@ -1101,11 +1122,11 @@
new_vector = new_index.map do |idx|
self[idx]
end
return new_vector if as_a != :vector
-
+
Daru::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
end
# Returns a Vector containing only missing data (preserves indexes).
def only_missing as_a=:vector
@@ -1147,30 +1168,30 @@
def clone_structure
Daru::Vector.new(([nil]*@size), name: @name, index: @index.dup)
end
# Save the vector to a file
- #
+ #
# == Arguments
- #
+ #
# * filename - Path of file where the vector is to be saved
def save filename
Daru::IO.save self, filename
end
def _dump(depth) # :nodoc:
Marshal.dump({
- data: @data.to_a,
- dtype: @dtype,
- name: @name,
+ data: @data.to_a,
+ dtype: @dtype,
+ name: @name,
index: @index,
missing_values: @missing_values})
end
def self._load(data) # :nodoc:
h = Marshal.load(data)
- Daru::Vector.new(h[:data], index: h[:index],
+ Daru::Vector.new(h[:data], index: h[:index],
name: h[:name], dtype: h[:dtype], missing_values: h[:missing_values])
end
def daru_vector *name
self
@@ -1259,13 +1280,12 @@
[j,i]
end
def keep? a, b, order, &block
- return false if a.nil? or b.nil?
eval = block.call(a,b)
- if order == :ascending
+ if order == :ascending
return true if eval == -1
return false if eval == 1
elsif order == :descending
return false if eval == -1
return true if eval == 1
@@ -1276,11 +1296,11 @@
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
# @dtype variable is set and the underlying data type of vector changed.
def cast_vector_to dtype, source=nil, nm_dtype=nil
source = @data.to_a if source.nil?
- new_vector =
+ new_vector =
case dtype
when :array then Daru::Accessors::ArrayWrapper.new(source, self)
when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype)
when :gsl then Daru::Accessors::GSLWrapper.new(source, self)
when :mdarray then raise NotImplementedError, "MDArray not yet supported."
@@ -1312,12 +1332,12 @@
def set_size
@size = @data.size
end
def set_name name
- @name =
- if name.is_a?(Numeric) then name
+ @name =
+ if name.is_a?(Numeric) then name
elsif name.is_a?(Array) then name.join # in case of MultiIndex tuple
elsif name then name # anything but Numeric or nil
else
nil
end
@@ -1353,6 +1373,6 @@
@missing_values[e] = 0
end
end
end
end
-end
\ No newline at end of file
+end