lib/daru/vector.rb in daru-0.0.4 vs lib/daru/vector.rb in daru-0.0.5

- old
+ new

@@ -12,46 +12,59 @@ include Daru::Maths::Arithmetic::Vector include Daru::Maths::Statistics::Vector include Daru::Plotting::Vector def each(&block) - @vector.each(&block) + return to_enum(:each) unless block_given? + + @data.each(&block) + self end def map!(&block) - @vector.map!(&block) + return to_enum(:map!) unless block_given? + @data.map!(&block) self end def map(&block) - Daru::Vector.new @vector.map(&block), name: @name, index: @index, dtype: @dtype + return to_enum(:map) unless block_given? + + Daru::Vector.new @data.map(&block), name: @name, index: @index, dtype: @dtype end alias_method :recode, :map attr_reader :name attr_reader :index attr_reader :size attr_reader :dtype + attr_reader :nm_dtype + attr_reader :nil_positions # Create a Vector object. # == Arguments # # @param source[Array,Hash] - Supply elements in the form of an Array or a Hash. If Array, a - # numeric index will be created if not supplied in the options. Specifying more - # index elements than actual values in *source* will insert *nil* into the - # surplus index elements. When a Hash is specified, the keys of the Hash are - # taken as the index elements and the corresponding values as the values that - # populate the vector. + # numeric index will be created if not supplied in the options. Specifying more + # index elements than actual values in *source* will insert *nil* into the + # surplus index elements. When a Hash is specified, the keys of the Hash are + # taken as the index elements and the corresponding values as the values that + # populate the vector. # # == Options # - # * +:name+ - Name of the vector + # * +:name+ - Name of the vector # - # * +:index+ - Index of the vector + # * +:index+ - Index of the vector # + # * +:dtype+ - The underlying data type. Can be :array or :nmatrix. Default :array. + # + # * +:nm_dtype+ - For NMatrix, the data type of the numbers. See the NMatrix docs for + # further information on supported data type. + # # == Usage # # vecarr = Daru::Vector.new [1,2,3,4], index: [:a, :e, :i, :o] # vechsh = Daru::Vector.new({a: 1, e: 2, i: 3, o: 4}) def initialize source, opts={} @@ -62,80 +75,116 @@ else index = opts[:index] source = source || [] end name = opts[:name] - @dtype = opts[:dtype] || Array - set_name name - @vector = - case - when @dtype == Array - Daru::Accessors::ArrayWrapper.new source.dup, self - when @dtype == NMatrix - Daru::Accessors::NMatrixWrapper.new source.dup, self - when @dtype == MDArray - Daru::Accessors::MDArrayWrapper.new source.dup - when @dtype == Range, Matrix - Daru::Accessors::ArrayWrapper.new source.to_a.dup, self + @data = cast_vector_to(opts[:dtype], source, opts[:nm_dtype]) + @index = create_index(index || @data.size) + + if @index.size > @data.size + cast(dtype: :array) # NM with nils seg faults + (@index.size - @data.size).times { @data << nil } + elsif @index.size < @data.size + raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}" end - if index.nil? - @index = Daru::Index.new @vector.size - else - @index = index.to_index - end - # TODO: Will need work for NMatrix/MDArray - if @index.size > @vector.size - self.coerce Array # NM with nils seg faults - (@index.size - @vector.size).times { @vector << nil } - elsif @index.size < @vector.size - raise IndexError, "Expected index size >= vector size" - end - + @possibly_changed_type = true + set_nil_positions set_size end - # Get one or more elements with specified index. + # Get one or more elements with specified index or a range. # # == Usage + # # For vectors employing single layer Index + # # v[:one, :two] # => Daru::Vector with indexes :one and :two # v[:one] # => Single element - def [](index, *indexes) - if indexes.empty? - case index - when Range - # range into vector - # + # v[:one..:three] # => Daru::Vector with indexes :one, :two and :three + # + # # For vectors employing hierarchial multi index + # + def [](*indexes) + location = indexes[0] + if @index.is_a?(MultiIndex) + result = + if location.is_a?(Integer) + element_from_numeric_index(location) + elsif location.is_a?(Range) + arry = location.inject([]) do |memo, num| + memo << element_from_numeric_index(num) + memo + end + + new_index = Daru::MultiIndex.new(@index.to_a[location]) + Daru::Vector.new(arry, index: new_index, name: @name, dtype: dtype) else - if @index.include? index - @vector[@index[index]] - elsif index.is_a?(Numeric) - @vector[index] + sub_index = @index[indexes] + + if sub_index.is_a?(Integer) + element_from_numeric_index(sub_index) else - return nil + elements = sub_index.map do |tuple| + @data[@index[(indexes + tuple)]] + end + Daru::Vector.new(elements, index: Daru::MultiIndex.new(sub_index.to_a), + name: @name, dtype: @dtype) end end + + return result else - indexes.unshift index + unless indexes[1] + case location + when Range + range = + if location.first.is_a?(Numeric) + location + else + first = location.first + last = location.last - Daru::Vector.new indexes.map { |index| @vector[@index[index]] },name: @name, - index: indexes + (first..last) + end + indexes = @index[range] + else + return element_from_numeric_index(location) + end + end + + Daru::Vector.new indexes.map { |loc| @data[index_for(loc)] }, name: @name, + index: indexes.map { |e| named_index_for(e) }, dtype: @dtype end end - def []=(index, value) - @vector = @vector.coerce(Array) if value.nil? + def []=(*location, value) + cast(dtype: :array) if value.nil? and dtype != :array - if @index.include? index - @vector[@index[index]] = value + @possibly_changed_type = true if @type == :object and (value.nil? or + value.is_a?(Numeric)) + @possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and + !value.nil?) + + pos = + if @index.is_a?(MultiIndex) and !location[0].is_a?(Integer) + index_for location else - @vector[index] = value + index_for location[0] end + if pos.is_a?(MultiIndex) + pos.each do |sub_tuple| + self[*(location + sub_tuple)] = value + end + else + @data[pos] = value + end + set_size + set_nil_positions end # Two vectors are equal if the have the exact same index values corresponding # with the exact same elements. Name is ignored. def == other @@ -156,106 +205,239 @@ def push element concat element end - def re_index new_index - + def head q=10 + self[0..q] end + def tail q=10 + self[-q..-1] + end + # Append an element to the vector by specifying the element and index def concat element, index=nil raise IndexError, "Expected new unique index" if @index.include? index if index.nil? and @index.index_class == Integer - @index = Daru::Index.new @size+1 + @index = create_index(@size + 1) index = @size else begin - @index = @index.re_index(@index + index) - rescue Exception => e + @index = create_index(@index + index) + rescue StandardError => e raise e, "Expected valid index." end end - - @vector[@index[index]] = element - + @data[@index[index]] = element set_size + set_nil_positions end - def coerce dtype - begin - @vector = @vector.coerce @dtype - @dtype = dtype - rescue StandardError => e - puts "Cannot convert to #{dtype} because of data type mismatch. #{e}" - end + # Cast a vector to a new data type. + # + # == Options + # + # * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix. + def cast opts={} + dtype = opts[:dtype] + raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless + dtype == :array or dtype == :nmatrix - self + @data = cast_vector_to dtype end # Delete an element by value def delete element self.delete_at index_of(element) end # Delete element by index def delete_at index idx = named_index_for index - @vector.delete_at @index[idx] + @data.delete_at @index[idx] if @index.index_class == Integer @index = Daru::Index.new @size-1 else - @index = (@index.to_a - [idx]).to_index + @index = Daru::Index.new (@index.to_a - [idx]) end set_size + set_nil_positions end + # The type of data contained in the vector. Can be :object or :numeric. If + # the underlying dtype is an NMatrix, this method will return the data type + # of the NMatrix object. + # + # Running through the data to figure out the kind of data is delayed to the + # last possible moment. + def type + return @data.nm_dtype if dtype == :nmatrix + + if @type.nil? or @possibly_changed_type + @type = :numeric + self.each do |e| + unless e.nil? + unless e.is_a?(Numeric) + @type = :object + break + end + end + end + @possibly_changed_type = false + end + + @type + end + # Get index of element def index_of element - @index.key @vector.index(element) + @index.key @data.index(element) end # Keep only unique elements of the vector alongwith their indexes. def uniq - uniq_vector = @vector.uniq + uniq_vector = @data.uniq new_index = uniq_vector.inject([]) do |acc, element| acc << index_of(element) acc end Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype end - # def sort ascending=true - # if ascending - - # end - # end + # Sorts a vector according to its values. If a block is specified, the contents + # will be evaluated and data will be swapped whenever the block evaluates + # to *true*. Defaults to ascending order sorting. Any missing values will be + # put at the end of the vector. Preserves indexing. Default sort algorithm is + # quick sort. + # + # == Options + # + # * +:ascending+ - if false, will sort in descending order. Defaults to true. + # + # * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now. + # == Usage + # + # v = Daru::Vector.new ["My first guitar", "jazz", "guitar"] + # # Say you want to sort these strings by length. + # v.sort { |a,b| a.length <=> b.length } + def sort opts={}, &block + opts = { + ascending: true, + type: :quick_sort + }.merge(opts) - def is_valid? value + block = lambda { |a,b| a <=> b } unless block + + order = opts[:ascending] ? :ascending : :descending + vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block) + index = @index.is_a?(MultiIndex) ? Daru::MultiIndex.new(index) : index + + Daru::Vector.new(vector, index: create_index(index), name: @name, dtype: @dtype) + end + + # Just sort the data and get an Array in return using Enumerable#sort. Non-destructive. + def sorted_data &block + @data.to_a.sort(&block) + end + + # Returns *true* if the value passed actually exists in the vector. + def exists? value !self[index_of(value)].nil? end + # Returns a vector which has *true* in the position where the element in self + # is nil, and false otherwise. + # + # == Usage + # + # v = Daru::Vector.new([1,2,4,nil]) + # v.is_nil? + # # => + # #<Daru::Vector:89421000 @name = nil @size = 4 > + # # nil + # # 0 false + # # 1 false + # # 2 false + # # 3 true + def is_nil? + nil_truth_vector = clone_structure + @index.each do |idx| + nil_truth_vector[idx] = self[idx].nil? ? true : false + end + + nil_truth_vector + end + + # Opposite of #is_nil? + def not_nil? + nil_truth_vector = clone_structure + @index.each do |idx| + nil_truth_vector[idx] = self[idx].nil? ? false : true + end + + nil_truth_vector + end + + # Replace all nils in the vector with the value passed as an argument. Destructive. + # See #replace_nils for non-destructive version + # + # == Arguments + # + # * +replacement+ - The value which should replace all nils + def replace_nils! replacement + nil_positions.each do |idx| + self[idx] = replacement + end + + self + end + + # Non-destructive version of #replace_nils! + def replace_nils replacement + self.dup.replace_nils!(replacement) + end + + def n_valid + @size + end + # Returns *true* if an index exists def has_index? index @index.include? index end + # Convert Vector to a horizontal or vertical Ruby Matrix. + # + # == Arguments + # + # * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix. + def to_matrix axis=:horizontal + if axis == :horizontal + Matrix[to_a] + elsif axis == :vertical + Matrix.columns([to_a]) + else + raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}" + end + end + # Convert to hash. Hash keys are indexes and values are the correspoding elements def to_hash @index.inject({}) do |hsh, index| hsh[index] = self[index] hsh end end # Return an array def to_a - @vector.to_a + @data.to_a end # Convert the hash from to_hash to json def to_json *args self.to_hash.to_json @@ -281,56 +463,67 @@ def to_s to_html end # Over rides original inspect for pretty printing in irb - def inspect spacing=10, threshold=15 + def inspect spacing=20, threshold=15 longest = [@name.to_s.size, @index.to_a.map(&:to_s).map(&:size).max, - @vector .map(&:to_s).map(&:size).max, + @data .map(&:to_s).map(&:size).max, 'nil'.size].max content = "" longest = spacing if longest > spacing name = @name || 'nil' formatter = "\n%#{longest}.#{longest}s %#{longest}.#{longest}s" content += "\n#<" + self.class.to_s + ":" + self.object_id.to_s + " @name = " + name.to_s + " @size = " + size.to_s + " >" content += sprintf formatter, "", name @index.each_with_index do |index, num| - content += sprintf formatter, index.to_s, (self[index] || 'nil').to_s + content += sprintf formatter, index.to_s, (self[*index] || 'nil').to_s if num > threshold content += sprintf formatter, '...', '...' break end end content += "\n" content end - # def compact! - # TODO: Compact and also take care of indexes - # @vector.compact! - # set_size - # end + # Create a new vector with a different index. + # + # @param new_index [Symbol, Array, Daru::Index] The new index. Passing *:seq* + # will reindex with sequential numbers from 0 to (n-1). + def reindex new_index + index = create_index(new_index == :seq ? @size : new_index) + Daru::Vector.new @data.to_a, index: index, name: name, dtype: @dtype + end # Give the vector a new name + # + # @param new_name [Symbol] The new name. def rename new_name @name = new_name.to_sym end # Duplicate elements and indexes def dup - Daru::Vector.new @vector.dup, name: @name, index: @index.dup + Daru::Vector.new @data.dup, name: @name, index: @index.dup end + # Copies the structure of the vector (i.e the index, size, etc.) and fills all + # all values with nils. + def clone_structure + Daru::Vector.new(([nil]*@size), name: @name, index: @index.dup) + end + def daru_vector *name self end - alias_method :dv, :daru_vector + alias :dv :daru_vector def method_missing(name, *args, &block) if name.match(/(.+)\=/) self[name] = args[0] elsif has_index?(name) @@ -340,30 +533,141 @@ end end private + def quick_sort vector, index, order, &block + recursive_quick_sort vector, index, order, 0, @size-1, &block + [vector, index] + end + + def recursive_quick_sort vector, index, order, left_lower, right_upper, &block + if left_lower < right_upper + left_upper, right_lower = partition(vector, index, order, left_lower, right_upper, &block) + if left_upper - left_lower < right_upper - right_lower + recursive_quick_sort(vector, index, order, left_lower, left_upper, &block) + recursive_quick_sort(vector, index, order, right_lower, right_upper, &block) + else + recursive_quick_sort(vector, index, order, right_lower, right_upper, &block) + recursive_quick_sort(vector, index, order, left_lower, left_upper, &block) + end + end + end + + def partition vector, index, order, left_lower, right_upper, &block + mindex = (left_lower + right_upper) / 2 + mvalue = vector[mindex] + i = left_lower + j = right_upper + opposite_order = order == :ascending ? :descending : :ascending + + i += 1 while(keep?(vector[i], mvalue, order, &block)) + j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block)) + + while i < j - 1 + vector[i], vector[j] = vector[j], vector[i] + index[i], index[j] = index[j], index[i] + i += 1 + j -= 1 + + i += 1 while(keep?(vector[i], mvalue, order, &block)) + j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block)) + end + + if i <= j + if i < j + vector[i], vector[j] = vector[j], vector[i] + index[i], index[j] = index[j], index[i] + end + i += 1 + j -= 1 + end + + [j,i] + end + + def keep? a, b, order, &block + return false if a.nil? or b.nil? + eval = block.call(a,b) + if order == :ascending + return true if eval == -1 + return false if eval == 1 + elsif order == :descending + return false if eval == -1 + return true if eval == 1 + end + return false + end + + # Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the + # @dtype variable is set and the underlying data type of vector changed. + def cast_vector_to dtype, source=nil, nm_dtype=nil + source = @data if source.nil? + return @data if @dtype and @dtype == dtype + + new_vector = + case dtype + when :array then Daru::Accessors::ArrayWrapper.new(source.to_a.dup, self) + when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source.to_a.dup, + self, nm_dtype) + when :mdarray then raise NotImplementedError, "MDArray not yet supported." + else Daru::Accessors::ArrayWrapper.new(source.dup, self) + end + + @dtype = dtype || :array + new_vector + end + def named_index_for index if @index.include? index index elsif @index.key index @index.key index else raise IndexError, "Specified index #{index} does not exist." end end + def index_for index + if @index.include?(index) + @index[index] + elsif index.is_a?(Numeric) + index + end + end + def set_size - @size = @vector.size + @size = @data.size end def set_name name - if name.is_a?(Numeric) - @name = name - elsif name # anything but Numeric or nil - @name = name.to_sym + @name = + if name.is_a?(Numeric) then name + elsif name.is_a?(Array) then name.join.to_sym # in case of MultiIndex tuple + elsif name then name.to_sym # anything but Numeric or nil else - @name = nil + nil end + end + + def set_nil_positions + @nil_positions = [] + @index.each do |e| + @nil_positions << e if(self[e].nil?) + end + @nil_positions.uniq! + end + + def create_index potential_index + if potential_index.is_a?(Daru::MultiIndex) or potential_index.is_a?(Daru::Index) + potential_index + else + Daru::Index.new(potential_index) + end + end + + def element_from_numeric_index location + pos = index_for location + pos ? @data[pos] : nil end end end \ No newline at end of file