Sha256: 1dfdeb4a2736a57916b6780ad1b968513fd8fea3e812c9a4cdbc42d692f732ab
Contents?: true
Size: 1.94 KB
Versions: 5
Compression:
Stored size: 1.94 KB
Contents
module IsoTree class Dataset attr_reader :numeric_columns, :categorical_columns, :array_type def initialize(data) @data = data if defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame) @vectors = data.vectors @numeric_columns, @categorical_columns = data.keys.partition { |k, v| ![:object, :bool].include?(data[k].type) } @array_type = false elsif defined?(Numo::NArray) && data.is_a?(Numo::NArray) raise ArgumentError, "Input must have 2 dimensions" if data.ndim != 2 data = data.cast_to(Numo::DFloat) ncols = data.shape[1] @numeric_columns = ncols.times.to_a @categorical_columns = [] @vectors = {} @numeric_columns.each do |k| @vectors[k] = data[true, k] end @array_type = true else data = data.to_a hashes = data.all? { |d| d.is_a?(Hash) } arrays = !hashes && data.all? { |d| d.is_a?(Array) } unless hashes || arrays raise ArgumentError, "Array elements must be all hashes or arrays" end nrows = data.size ncols = data.first ? data.first.size : 0 if data.any? { |r| r.size != ncols } raise ArgumentError, "All rows must have the same number of columns" end keys = if hashes data.flat_map(&:keys).uniq else ncols.times.to_a end @vectors = {} keys.each do |k| @vectors[k] = [] end data.each do |d| keys.each do |k| @vectors[k] << d[k] end end @numeric_columns, @categorical_columns = keys.partition { |k| @vectors[k].all? { |v| v.nil? || v.is_a?(Numeric) } } @array_type = arrays end raise ArgumentError, "No data" if size == 0 end def [](k) @vectors[k] end def size @vectors.any? ? @vectors.values.first.size : 0 end end end
Version data entries
5 entries across 5 versions & 1 rubygems
Version | Path |
---|---|
isotree-0.2.1 | lib/isotree/dataset.rb |
isotree-0.2.0 | lib/isotree/dataset.rb |
isotree-0.1.5 | lib/isotree/dataset.rb |
isotree-0.1.4 | lib/isotree/dataset.rb |
isotree-0.1.3 | lib/isotree/dataset.rb |