Sha256: 1dfdeb4a2736a57916b6780ad1b968513fd8fea3e812c9a4cdbc42d692f732ab

Contents?: true

Size: 1.94 KB

Versions: 5

Compression:

Stored size: 1.94 KB

Contents

module IsoTree
  class Dataset
    attr_reader :numeric_columns, :categorical_columns, :array_type

    def initialize(data)
      @data = data

      if defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame)
        @vectors = data.vectors
        @numeric_columns, @categorical_columns = data.keys.partition { |k, v| ![:object, :bool].include?(data[k].type) }
        @array_type = false
      elsif defined?(Numo::NArray) && data.is_a?(Numo::NArray)
        raise ArgumentError, "Input must have 2 dimensions" if data.ndim != 2

        data = data.cast_to(Numo::DFloat)
        ncols = data.shape[1]

        @numeric_columns = ncols.times.to_a
        @categorical_columns = []

        @vectors = {}
        @numeric_columns.each do |k|
          @vectors[k] = data[true, k]
        end
        @array_type = true
      else
        data = data.to_a

        hashes = data.all? { |d| d.is_a?(Hash) }
        arrays = !hashes && data.all? { |d| d.is_a?(Array) }
        unless hashes || arrays
          raise ArgumentError, "Array elements must be all hashes or arrays"
        end

        nrows = data.size
        ncols = data.first ? data.first.size : 0
        if data.any? { |r| r.size != ncols }
          raise ArgumentError, "All rows must have the same number of columns"
        end

        keys =
          if hashes
            data.flat_map(&:keys).uniq
          else
            ncols.times.to_a
          end

        @vectors = {}
        keys.each do |k|
          @vectors[k] = []
        end
        data.each do |d|
          keys.each do |k|
            @vectors[k] << d[k]
          end
        end

        @numeric_columns, @categorical_columns = keys.partition { |k| @vectors[k].all? { |v| v.nil? || v.is_a?(Numeric) } }
        @array_type = arrays
      end

      raise ArgumentError, "No data" if size == 0
    end

    def [](k)
      @vectors[k]
    end

    def size
      @vectors.any? ? @vectors.values.first.size : 0
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
isotree-0.2.1 lib/isotree/dataset.rb
isotree-0.2.0 lib/isotree/dataset.rb
isotree-0.1.5 lib/isotree/dataset.rb
isotree-0.1.4 lib/isotree/dataset.rb
isotree-0.1.3 lib/isotree/dataset.rb