lib/daru/vector.rb in daru-0.1.5 vs lib/daru/vector.rb in daru-0.1.6

- old
+ new

@@ -350,11 +350,11 @@ define_method(method) do |other| mod = Daru::Core::Query if other.is_a?(Daru::Vector) mod.apply_vector_operator operator, self, other else - mod.apply_scalar_operator operator, @data,other + mod.apply_scalar_operator operator, @data, other end end alias_method operator, method if operator != :== && operator != :!= end alias :gt :mt @@ -460,10 +460,30 @@ # # => true def include_values?(*values) values.any? { |v| include_with_nan? @data, v } end + # @note Do not use it to check for Float::NAN as + # Float::NAN == Float::NAN is false + # Return vector of booleans with value at ith position is either + # true or false depending upon whether value at position i is equal to + # any of the values passed in the argument or not + # @param [Array] *values values to equate with + # @return [Daru::Vector] vector of boolean values + # @example + # dv = Daru::Vector.new [1, 2, 3, 2, 1] + # dv.is_values 1, 2 + # # => #<Daru::Vector(5)> + # # 0 true + # # 1 true + # # 2 false + # # 3 true + # # 4 true + def is_values(*values) + Daru::Vector.new values.map { |v| eq(v) }.inject(:|) + end + # Append an element to the vector by specifying the element and index def concat element, index raise IndexError, 'Expected new unique index' if @index.include? index @index |= [index] @@ -479,12 +499,11 @@ # == Options # # * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix. def cast opts={} dt = opts[:dtype] - raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless - dt == :array || dt == :nmatrix || dt == :gsl + raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless %i[array nmatrix gsl].include?(dt) @data = cast_vector_to dt unless @dtype == dt end # Delete an element by value @@ -533,11 +552,11 @@ end # Get index of element def index_of element case dtype - when :array then @index.key @data.index { |x| x.eql? element } + when :array then @index.key(@data.index { |x| x.eql? element }) else @index.key @data.index(element) end end # Keep only unique elements of the vector alongwith their indexes. @@ -581,10 +600,35 @@ index = @index.reorder index Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype) end + # Sorts the vector according to it's`Index` values. Defaults to ascending + # order sorting. + # + # @param [Hash] opts the options for sort_by_index method. + # @option opts [Boolean] :ascending false, will sort `index` in + # descending order. + # + # @return [Vector] new sorted `Vector` according to the index values. + # + # @example + # + # dv = Daru::Vector.new [11, 13, 12], index: [23, 21, 22] + # # Say you want to sort index in ascending order + # dv.sort_by_index(ascending: true) + # #=> Daru::Vector.new [13, 12, 11], index: [21, 22, 23] + # # Say you want to sort index in descending order + # dv.sort_by_index(ascending: false) + # #=> Daru::Vector.new [11, 12, 13], index: [23, 22, 21] + def sort_by_index opts={} + opts = {ascending: true}.merge(opts) + _, new_order = resort_index(@index.each_with_index, opts).transpose + + reorder new_order + end + DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)| case when lv.nil? && rv.nil? li <=> ri when lv.nil? @@ -622,11 +666,11 @@ # Delete an element if block returns true. Destructive. def delete_if return to_enum(:delete_if) unless block_given? - keep_e, keep_i = each_with_index.select { |n, _i| !yield(n) }.transpose + keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose @data = cast_vector_to @dtype, keep_e @index = Daru::Index.new(keep_i) update_position_cache @@ -700,35 +744,10 @@ def reset_index! @index = Daru::Index.new(Array.new(size) { |i| i }) self end - # Returns a vector which has *true* in the position where the element in self - # is nil, and false otherwise. - # - # == Usage - # - # v = Daru::Vector.new([1,2,4,nil]) - # v.is_nil? - # # => - # #<Daru::Vector:89421000 @name = nil @size = 4 > - # # nil - # # 0 false - # # 1 false - # # 2 false - # # 3 true - # - def is_nil? - # FIXME: EXTREMELY bad name for method not returning boolean - zverok, 2016-05-18 - recode(&:nil?) - end - - # Opposite of #is_nil? - def not_nil? - recode { |v| !v.nil? } - end - # Replace all nils in the vector with the value passed as an argument. Destructive. # See #replace_nils for non-destructive version # # == Arguments # @@ -739,31 +758,47 @@ end self end - # Lags the series by k periods. + # Lags the series by `k` periods. # - # The convention is to set the oldest observations (the first ones - # in the series) to nil so that the size of the lagged series is the - # same as the original. + # Lags the series by `k` periods, "shifting" data and inserting `nil`s + # from beginning or end of a vector, while preserving original vector's + # size. # - # Usage: + # `k` can be positive or negative integer. If `k` is positive, `nil`s + # are inserted at the beginning of the vector, otherwise they are + # inserted at the end. # - # ts = Daru::Vector.new((1..10).map { rand }) - # # => [0.69, 0.23, 0.44, 0.71, ...] + # @param [Integer] k "shift" the series by `k` periods. `k` can be + # positive or negative. (default = 1) # - # ts.lag # => [nil, 0.69, 0.23, 0.44, ...] - # ts.lag(2) # => [nil, nil, 0.69, 0.23, ...] + # @return [Daru::Vector] a new vector with "shifted" inital values + # and `nil` values inserted. The return vector is the same length + # as the orignal vector. + # + # @example Lag a vector with different periods `k` + # + # ts = Daru::Vector.new(1..5) + # # => [1, 2, 3, 4, 5] + # + # ts.lag # => [nil, 1, 2, 3, 4] + # ts.lag(1) # => [nil, 1, 2, 3, 4] + # ts.lag(2) # => [nil, nil, 1, 2, 3] + # ts.lag(-1) # => [2, 3, 4, 5, nil] + # def lag k=1 - return dup if k.zero? - - dat = @data.to_a.dup - (dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] } - (0...k).each { |i| dat[i] = nil } - - Daru::Vector.new(dat, index: @index, name: @name) + case k + when 0 then dup + when 1...size + copy([nil] * k + data.to_a) + when -size..-1 + copy(data.to_a[k.abs...size]) + else + copy([]) + end end def detach_index Daru::DataFrame.new( index: @index.to_a, @@ -869,64 +904,112 @@ def to_json(*) to_h.to_json end # Convert to html for iruby - def to_html threshold=30 + def to_html(threshold=30) + table_thead = to_html_thead + table_tbody = to_html_tbody(threshold) path = if index.is_a?(MultiIndex) File.expand_path('../iruby/templates/vector_mi.html.erb', __FILE__) else File.expand_path('../iruby/templates/vector.html.erb', __FILE__) end ERB.new(File.read(path).strip).result(binding) end + def to_html_thead + table_thead_path = + if index.is_a?(MultiIndex) + File.expand_path('../iruby/templates/vector_mi_thead.html.erb', __FILE__) + else + File.expand_path('../iruby/templates/vector_thead.html.erb', __FILE__) + end + ERB.new(File.read(table_thead_path).strip).result(binding) + end + + def to_html_tbody(threshold=30) + table_tbody_path = + if index.is_a?(MultiIndex) + File.expand_path('../iruby/templates/vector_mi_tbody.html.erb', __FILE__) + else + File.expand_path('../iruby/templates/vector_tbody.html.erb', __FILE__) + end + ERB.new(File.read(table_tbody_path).strip).result(binding) + end + def to_s - to_html + "#<#{self.class}#{': ' + @name.to_s if @name}(#{size})#{':category' if category?}>" end - # Create a summary of the Vector using Report Builder. - def summary(method=:to_text) - ReportBuilder.new(no_title: true).add(self).send(method) + # Create a summary of the Vector + # @params [Fixnum] indent_level + # @return [String] String containing the summary of the Vector + # @example + # dv = Daru::Vector.new [1, 2, 3] + # puts dv.summary + # + # # = + # # n :3 + # # non-missing:3 + # # median: 2 + # # mean: 2.0000 + # # std.dev.: 1.0000 + # # std.err.: 0.5774 + # # skew: 0.0000 + # # kurtosis: -2.3333 + def summary(indent_level=0) + non_missing = size - count_values(*Daru::MISSING_VALUES) + summary = ' =' * indent_level + "= #{name}" \ + "\n n :#{size}" \ + "\n non-missing:#{non_missing}" + case type + when :object + summary << object_summary + when :numeric + summary << numeric_summary + end + summary.split("\n").join("\n" + ' ' * indent_level) end - # :nocov: - def report_building b # rubocop:disable Metrics/AbcSize,Metrics/MethodLength - b.section(name: name) do |s| - s.text "n :#{size}" - s.text "n valid:#{count_values(*Daru::MISSING_VALUES)}" - if @type == :object - s.text "factors: #{factors.to_a.join(',')}" - s.text "mode: #{mode}" + # Displays summary for an object type Vector + # @return [String] String containing object vector summary + def object_summary + nval = count_values(*Daru::MISSING_VALUES) + summary = "\n factors: #{factors.to_a.join(',')}" \ + "\n mode: #{mode.to_a.join(',')}" \ + "\n Distribution\n" - s.table(name: 'Distribution') do |t| - frequencies.sort_by(&:to_s).each do |k,v| - key = @index.include?(k) ? @index[k] : k - t.row [key, v, ('%0.2f%%' % (v.quo(count_values(*Daru::MISSING_VALUES))*100))] - end - end - end + data = frequencies.sort.each_with_index.map do |v, k| + [k, v, '%0.2f%%' % ((nval.zero? ? 1 : v.quo(nval))*100)] + end - s.text "median: #{median}" if @type==:numeric || @type==:numeric - if @type==:numeric - s.text 'mean: %0.4f' % mean - if sd - s.text 'std.dev.: %0.4f' % sd - s.text 'std.err.: %0.4f' % se - s.text 'skew: %0.4f' % skew - s.text 'kurtosis: %0.4f' % kurtosis - end - end + summary + Formatters::Table.format(data) + end + + # Displays summary for an numeric type Vector + # @return [String] String containing numeric vector summary + def numeric_summary + summary = "\n median: #{median}" + + "\n mean: %0.4f" % mean + if sd + summary << "\n std.dev.: %0.4f" % sd + + "\n std.err.: %0.4f" % se end + + if count_values(*Daru::MISSING_VALUES).zero? + summary << "\n skew: %0.4f" % skew + + "\n kurtosis: %0.4f" % kurtosis + end + summary end - # :nocov: # Over rides original inspect for pretty printing in irb def inspect spacing=20, threshold=15 row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a - "#<#{self.class}(#{size})#{':cataegory' if category?}>\n" + + "#<#{self.class}(#{size})#{':category' if category?}>\n" + Formatters::Table.format( to_a.lazy.map { |v| [v] }, headers: @name && [@name], row_headers: row_headers, threshold: threshold, @@ -1334,10 +1417,16 @@ to_df.group_by(*args) end private + def copy(values) + # Make sure values is right-justified to the size of the vector + values.concat([nil] * (size-values.size)) if values.size < size + Daru::Vector.new(values[0...size], index: @index, name: @name) + end + def nil_positions @nil_positions || @nil_positions = size.times.select { |i| @data[i].nil? } end @@ -1424,10 +1513,10 @@ when :gsl then Daru::Accessors::GSLWrapper.new(source, self) when :mdarray then raise NotImplementedError, 'MDArray not yet supported.' else raise ArgumentError, "Unknown dtype #{dtype}" end - @dtype = dtype || :array + @dtype = dtype new_vector end def set_name name # rubocop:disable Style/AccessorMethodName @name =