lib/daru/vector.rb in daru-0.1.1 vs lib/daru/vector.rb in daru-0.1.2

- old
+ new

@@ -14,11 +14,11 @@ include Daru::Maths::Statistics::Vector include Daru::Plotting::Vector if Daru.has_nyaplot? def each(&block) return to_enum(:each) unless block_given? - + @data.each(&block) self end def each_index(&block) @@ -59,37 +59,37 @@ # Store a hash of labels for values. Supplementary only. Recommend using index # for proper usage. attr_accessor :labels # Create a Vector object. - # + # # == Arguments - # - # @param source[Array,Hash] - Supply elements in the form of an Array or a - # Hash. If Array, a numeric index will be created if not supplied in the - # options. Specifying more index elements than actual values in *source* - # will insert *nil* into the surplus index elements. When a Hash is specified, - # the keys of the Hash are taken as the index elements and the corresponding + # + # @param source[Array,Hash] - Supply elements in the form of an Array or a + # Hash. If Array, a numeric index will be created if not supplied in the + # options. Specifying more index elements than actual values in *source* + # will insert *nil* into the surplus index elements. When a Hash is specified, + # the keys of the Hash are taken as the index elements and the corresponding # values as the values that populate the vector. - # + # # == Options - # + # # * +:name+ - Name of the vector - # + # # * +:index+ - Index of the vector - # - # * +:dtype+ - The underlying data type. Can be :array, :nmatrix or :gsl. + # + # * +:dtype+ - The underlying data type. Can be :array, :nmatrix or :gsl. # Default :array. - # + # # * +:nm_dtype+ - For NMatrix, the data type of the numbers. See the NMatrix docs for # further information on supported data type. - # + # # * +:missing_values+ - An Array of the values that are to be treated as 'missing'. # nil is the default missing value. - # + # # == Usage - # + # # vecarr = Daru::Vector.new [1,2,3,4], index: [:a, :e, :i, :o] # vechsh = Daru::Vector.new({a: 1, e: 2, i: 3, o: 4}) def initialize source, opts={} index = nil if source.is_a?(Hash) @@ -102,11 +102,11 @@ name = opts[:name] set_name name @data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype]) @index = try_create_index(index || @data.size) - + if @index.size > @data.size cast(dtype: :array) # NM with nils seg faults (@index.size - @data.size).times { @data << nil } elsif @index.size < @data.size raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}" @@ -118,15 +118,15 @@ set_size end # Create a new vector by specifying the size and an optional value # and block to generate values. - # + # # == Description # # The *new_with_size* class method lets you create a Daru::Vector - # by specifying the size as the argument. The optional block, if + # by specifying the size as the argument. The optional block, if # supplied, is run once for populating each element in the Vector. # # The result of each run of the block is the value that is ultimately # assigned to that position in the Vector. # @@ -147,19 +147,19 @@ # Create a vector using (almost) any object # * Array: flattened # * Range: transformed using to_a # * Daru::Vector # * Numeric and string values - # + # # == Description # # The `Vector.[]` class method creates a vector from almost any # object that has a `#to_a` method defined on it. It is similar # to R's `c` method. - # + # # == Usage - # + # # a = Daru::Vector[1,2,3,4,6..10] # #=> # # <Daru::Vector:99448510 @name = nil @size = 9 > # # nil # # 0 1 @@ -187,25 +187,25 @@ end Daru::Vector.new(values) end # Get one or more elements with specified index or a range. - # + # # == Usage # # For vectors employing single layer Index - # + # # v[:one, :two] # => Daru::Vector with indexes :one and :two # v[:one] # => Single element # v[:one..:three] # => Daru::Vector with indexes :one, :two and :three - # + # # # For vectors employing hierarchial multi index - # + # def [](*indexes) location = indexes[0] if @index.is_a?(MultiIndex) sub_index = @index[indexes] - result = + result = if sub_index.is_a?(Integer) @data[sub_index] else elements = sub_index.map do |tuple| @data[@index[tuple]] @@ -218,10 +218,13 @@ elements, index: sub_index, name: @name, dtype: @dtype) end return result else + raise TypeError, "Invalid index type #{location.inspect}.\ + \nUsage: v[:a, :b] gives elements with keys :a and :b for vector v." if location.is_a? Array + unless indexes[1] case location when Range first = location.first last = location.last @@ -236,64 +239,72 @@ end else indexes = indexes.map { |e| named_index_for(e) } end - Daru::Vector.new( - indexes.map { |loc| @data[@index[loc]] }, - name: @name, index: indexes, dtype: @dtype) + begin + Daru::Vector.new( + indexes.map { |loc| @data[@index[loc]] }, + name: @name, index: indexes, dtype: @dtype) + rescue NoMethodError + raise IndexError, "Specified index #{pos.inspect} does not exist." + end end end # Just like in Hashes, you can specify the index label of the Daru::Vector # and assign an element an that place in the Daru::Vector. - # + # # == Usage - # + # # v = Daru::Vector.new([1,2,3], index: [:a, :b, :c]) # v[:a] = 999 - # #=> + # #=> # ##<Daru::Vector:90257920 @name = nil @size = 3 > # # nil # # a 999 # # b 2 # # c 3 def []=(*location, value) cast(dtype: :array) if value.nil? and dtype != :array - @possibly_changed_type = true if @type == :object and (value.nil? or + @possibly_changed_type = true if @type == :object and (value.nil? or value.is_a?(Numeric)) @possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and !value.nil?) location = location[0] unless @index.is_a?(MultiIndex) pos = @index[location] if pos.is_a?(Numeric) @data[pos] = value else - pos.each { |tuple| self[tuple] = value } + begin + pos.each { |tuple| self[tuple] = value } + rescue NoMethodError + raise IndexError, "Specified index #{pos.inspect} does not exist." + end end set_size set_missing_positions unless Daru.lazy_update end # The values to be treated as 'missing'. *nil* is the default missing # type. To set missing values see the missing_values= method. def missing_values - @missing_values.keys + @missing_values.keys end # Assign an Array to treat certain values as 'missing'. - # + # # == Usage - # + # # v = Daru::Vector.new [1,2,3,4,5] # v.missing_values = [3] # v.update - # v.missing_positions + # v.missing_positions # #=> [2] def missing_values= values set_missing_values values set_missing_positions unless Daru.lazy_update end @@ -368,25 +379,25 @@ define_method(method) do |other| mod = Daru::Core::Query if other.is_a?(Daru::Vector) mod.apply_vector_operator operator, self, other else - mod.apply_scalar_operator operator, @data,other + mod.apply_scalar_operator operator, @data,other end end end alias :gt :mt - alias :gteq :mteq + alias :gteq :mteq # Comparator for checking if any of the elements in *other* exist in self. # # @param [Array, Daru::Vector] other A collection which has elements that # need to be checked for in self. # @example Usage of `in`. # vector = Daru::Vector.new([1,2,3,4,5]) # vector.where(vector.in([3,5])) - # #=> + # #=> # ##<Daru::Vector:82215960 @name = nil @size = 2 > # # nil # # 2 3 # # 4 5 def in other @@ -398,36 +409,36 @@ end ) end # Return a new vector based on the contents of a boolean array. Use with the - # comparator methods to obtain meaningful results. See this notebook for + # comparator methods to obtain meaningful results. See this notebook for # a good overview of using #where. # - # @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The + # @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The # collection containing the true of false values. Each element in the Vector # corresponding to a `true` in the bool_arry will be returned alongwith it's # index. # @exmaple Usage of #where. # vector = Daru::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21]) # # # Simple logic statement passed to #where. # vector.where(vector.eq(5).or(vector.eq(1))) - # # => + # # => # ##<Daru::Vector:77626210 @name = nil @size = 7 > # # nil # # 2 5 # # 4 5 # # 7 5 # # 10 1 # # 11 5 # # 13 5 # # 15 1 - # + # # # A somewhat more complex logic statement # vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1])) - # #=> + # #=> # ##<Daru::Vector:81072310 @name = nil @size = 7 > # # nil # # 2 5 # # 4 5 # # 7 5 @@ -462,29 +473,29 @@ @data[@index[index]] = element set_size set_missing_positions unless Daru.lazy_update end - alias :push :concat + alias :push :concat alias :<< :concat # Cast a vector to a new data type. - # + # # == Options - # + # # * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix. def cast opts={} dt = opts[:dtype] - raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless + raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless dt == :array or dt == :nmatrix or dt == :gsl @data = cast_vector_to dt unless @dtype == dt end # Delete an element by value def delete element - self.delete_at index_of(element) + self.delete_at index_of(element) end # Delete element by index def delete_at index @data.delete_at @index[index] @@ -495,13 +506,13 @@ end # The type of data contained in the vector. Can be :object or :numeric. If # the underlying dtype is an NMatrix, this method will return the data type # of the NMatrix object. - # + # # Running through the data to figure out the kind of data is delayed to the - # last possible moment. + # last possible moment. def type return @data.nm_dtype if dtype == :nmatrix if @type.nil? or @possibly_changed_type @type = :numeric @@ -525,12 +536,12 @@ end # Keep only unique elements of the vector alongwith their indexes. def uniq uniq_vector = @data.uniq - new_index = uniq_vector.inject([]) do |acc, element| - acc << index_of(element) + new_index = uniq_vector.inject([]) do |acc, element| + acc << index_of(element) acc end Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype end @@ -542,41 +553,51 @@ def all? &block @data.data.all?(&block) end # Sorts a vector according to its values. If a block is specified, the contents - # will be evaluated and data will be swapped whenever the block evaluates + # will be evaluated and data will be swapped whenever the block evaluates # to *true*. Defaults to ascending order sorting. Any missing values will be # put at the end of the vector. Preserves indexing. Default sort algorithm is # quick sort. - # + # # == Options - # + # # * +:ascending+ - if false, will sort in descending order. Defaults to true. - # + # # * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now. # == Usage - # + # # v = Daru::Vector.new ["My first guitar", "jazz", "guitar"] # # Say you want to sort these strings by length. # v.sort(ascending: false) { |a,b| a.length <=> b.length } def sort opts={}, &block opts = { ascending: true, type: :quick_sort }.merge(opts) - block = lambda { |a,b| a <=> b } unless block - + block = lambda { |a,b| + return a <=> b if !(a.nil? || b.nil?) + + if a.nil? && b.nil? + 0 + elsif a.nil? + -1 + else + 1 + end + } unless block + order = opts[:ascending] ? :ascending : :descending vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block) index = Daru::Index.new index Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype) end - # Just sort the data and get an Array in return using Enumerable#sort. + # Just sort the data and get an Array in return using Enumerable#sort. # Non-destructive. def sorted_data &block @data.to_a.sort(&block) end @@ -726,16 +747,16 @@ self end # Returns a vector which has *true* in the position where the element in self # is nil, and false otherwise. - # + # # == Usage - # + # # v = Daru::Vector.new([1,2,4,nil]) # v.is_nil? - # # => + # # => # #<Daru::Vector:89421000 @name = nil @size = 4 > # # nil # # 0 false # # 1 false # # 2 false @@ -759,13 +780,13 @@ nil_truth_vector end # Replace all nils in the vector with the value passed as an argument. Destructive. # See #replace_nils for non-destructive version - # + # # == Arguments - # + # # * +replacement+ - The value which should replace all nils def replace_nils! replacement missing_positions.each do |idx| self[idx] = replacement end @@ -817,13 +838,13 @@ def has_index? index @index.include? index end # Convert Vector to a horizontal or vertical Ruby Matrix. - # + # # == Arguments - # + # # * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix. def to_matrix axis=:horizontal if axis == :horizontal Matrix[to_a] elsif axis == :vertical @@ -859,32 +880,32 @@ def to_a @data.to_a end # Convert the hash from to_hash to json - def to_json *args + def to_json *args self.to_hash.to_json end # Convert to html for iruby def to_html threshold=30 name = @name || 'nil' - html = "<table>" + + html = "<table>" + "<tr>" + - "<th colspan=\"2\">" + - "Daru::Vector:#{self.object_id} " + " size: #{size}" + + "<th colspan=\"2\">" + + "Daru::Vector:#{self.object_id} " + " size: #{size}" + "</th>" + "</tr>" html += '<tr><th> </th><th>' + name.to_s + '</th></tr>' @index.each_with_index do |index, num| html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>' - + if num > threshold html += '<tr><td>...</td><td>...</td></tr>' last_index = @index.to_a.last - html += '<tr>' + + html += '<tr>' + '<td>' + last_index.to_s + '</td>' + '<td>' + self[last_index].to_s + '</td>' + '</tr>' break end @@ -933,11 +954,11 @@ end # Over rides original inspect for pretty printing in irb def inspect spacing=20, threshold=15 longest = [@name.to_s.size, - (@index.to_a.map(&:to_s).map(&:size).max || 0), + (@index.to_a.map(&:to_s).map(&:size).max || 0), (@data .map(&:to_s).map(&:size).max || 0), 'nil'.size].max content = "" longest = spacing if longest > spacing @@ -973,34 +994,34 @@ vector end def index= idx - raise ArgumentError, - "Size of supplied index #{index.size} does not match size of DataFrame" if + raise ArgumentError, + "Size of supplied index #{index.size} does not match size of DataFrame" if idx.size != self.size - raise ArgumentError, "Can only assign type Index and its subclasses." unless + raise ArgumentError, "Can only assign type Index and its subclasses." unless idx.kind_of?(Daru::Index) - + @index = idx self end # Give the vector a new name - # + # # @param new_name [Symbol] The new name. def rename new_name if new_name.is_a?(Numeric) - @name = new_name + @name = new_name return end - + @name = new_name end # Duplicate elements and indexes - def dup + def dup Daru::Vector.new @data.dup, name: @name, index: @index.dup end # == Bootstrap # Generate +nr+ resamples (with replacement) of size +s+ @@ -1080,19 +1101,19 @@ end Daru::DataFrame.new ps end # Creates a new vector consisting only of non-nil data - # + # # == Arguments - # + # # @as_a [Symbol] Passing :array will return only the elements # as an Array. Otherwise will return a Daru::Vector. - # + # # @duplicate [Symbol] In case no missing data is found in the # vector, setting this to false will return the same vector. - # Otherwise, a duplicate will be returned irrespective of + # Otherwise, a duplicate will be returned irrespective of # presence of missing data. def only_valid as_a=:vector, duplicate=true return self.dup if !has_missing_data? and as_a == :vector and duplicate return self if !has_missing_data? and as_a == :vector and !duplicate return self.to_a if !has_missing_data? and as_a != :vector @@ -1101,11 +1122,11 @@ new_vector = new_index.map do |idx| self[idx] end return new_vector if as_a != :vector - + Daru::Vector.new new_vector, index: new_index, name: @name, dtype: dtype end # Returns a Vector containing only missing data (preserves indexes). def only_missing as_a=:vector @@ -1147,30 +1168,30 @@ def clone_structure Daru::Vector.new(([nil]*@size), name: @name, index: @index.dup) end # Save the vector to a file - # + # # == Arguments - # + # # * filename - Path of file where the vector is to be saved def save filename Daru::IO.save self, filename end def _dump(depth) # :nodoc: Marshal.dump({ - data: @data.to_a, - dtype: @dtype, - name: @name, + data: @data.to_a, + dtype: @dtype, + name: @name, index: @index, missing_values: @missing_values}) end def self._load(data) # :nodoc: h = Marshal.load(data) - Daru::Vector.new(h[:data], index: h[:index], + Daru::Vector.new(h[:data], index: h[:index], name: h[:name], dtype: h[:dtype], missing_values: h[:missing_values]) end def daru_vector *name self @@ -1259,13 +1280,12 @@ [j,i] end def keep? a, b, order, &block - return false if a.nil? or b.nil? eval = block.call(a,b) - if order == :ascending + if order == :ascending return true if eval == -1 return false if eval == 1 elsif order == :descending return false if eval == -1 return true if eval == 1 @@ -1276,11 +1296,11 @@ # Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the # @dtype variable is set and the underlying data type of vector changed. def cast_vector_to dtype, source=nil, nm_dtype=nil source = @data.to_a if source.nil? - new_vector = + new_vector = case dtype when :array then Daru::Accessors::ArrayWrapper.new(source, self) when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype) when :gsl then Daru::Accessors::GSLWrapper.new(source, self) when :mdarray then raise NotImplementedError, "MDArray not yet supported." @@ -1312,12 +1332,12 @@ def set_size @size = @data.size end def set_name name - @name = - if name.is_a?(Numeric) then name + @name = + if name.is_a?(Numeric) then name elsif name.is_a?(Array) then name.join # in case of MultiIndex tuple elsif name then name # anything but Numeric or nil else nil end @@ -1353,6 +1373,6 @@ @missing_values[e] = 0 end end end end -end \ No newline at end of file +end