Sha256: ab1c866443f6c2d26113fe3e89b247a015f70dd707f17877e97937e215398151
Contents?: true
Size: 1.91 KB
Versions: 4
Compression:
Stored size: 1.91 KB
Contents
module IsoTree class Dataset attr_reader :numeric_columns, :categorical_columns, :array_type def initialize(data) @data = data if defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame) @vectors = data.vectors @numeric_columns, @categorical_columns = data.keys.partition { |k, v| ![:object, :bool].include?(data[k].type) } @array_type = false elsif defined?(Numo::NArray) && data.is_a?(Numo::NArray) raise ArgumentError, "Input must have 2 dimensions" if data.ndim != 2 data = data.cast_to(Numo::DFloat) ncols = data.shape[1] @numeric_columns = ncols.times.to_a @categorical_columns = [] @vectors = {} @numeric_columns.each do |k| @vectors[k] = data[true, k] end @array_type = true else data = data.to_a hashes = data.all? { |d| d.is_a?(Hash) } arrays = !hashes && data.all? { |d| d.is_a?(Array) } unless hashes || arrays raise ArgumentError, "Array elements must be all hashes or arrays" end ncols = data.first ? data.first.size : 0 if data.any? { |r| r.size != ncols } raise ArgumentError, "All rows must have the same number of columns" end keys = if hashes data.flat_map(&:keys).uniq else ncols.times.to_a end @vectors = {} keys.each do |k| @vectors[k] = [] end data.each do |d| keys.each do |k| @vectors[k] << d[k] end end @numeric_columns, @categorical_columns = keys.partition { |k| @vectors[k].all? { |v| v.nil? || v.is_a?(Numeric) } } @array_type = arrays end raise ArgumentError, "No data" if size == 0 end def [](k) @vectors[k] end def size @vectors.any? ? @vectors.values.first.size : 0 end end end
Version data entries
4 entries across 4 versions & 1 rubygems
Version | Path |
---|---|
isotree-0.4.0 | lib/isotree/dataset.rb |
isotree-0.3.1 | lib/isotree/dataset.rb |
isotree-0.3.0 | lib/isotree/dataset.rb |
isotree-0.2.2 | lib/isotree/dataset.rb |