Sha256: 4919c34bd9fcfb36d6dcf5a69d459640eb4c399a73ba4890bbfc5c05317d39ef

Contents?: true

Size: 895 Bytes

Versions: 4

Compression:

Stored size: 895 Bytes

Contents

module EasyML::Data
  class Datasource
    class PolarsDatasource < Datasource
      include GlueGun::DSL

      attribute :df
      validate :df_is_dataframe
      def df_is_dataframe
        return if df.nil? || df.is_a?(Polars::DataFrame)

        errors.add(:df, "Must be an instance of Polars::DataFrame")
      end
      attr_accessor :last_updated_at

      def initialize(options)
        super
        @last_updated_at = Time.now
      end

      def in_batches(of: 10_000)
        total_rows = df.shape[0]
        (0...total_rows).step(of) do |start|
          end_index = [start + of, total_rows].min
          yield df.slice(start, end_index - start)
        end
      end

      def files
        [] # No files, as this is in-memory
      end

      def refresh!
        # No need to refresh for in-memory datasource
      end

      def data
        df
      end
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
easy_ml-0.1.4 lib/easy_ml/data/datasource/polars_datasource.rb
easy_ml-0.1.3 lib/easy_ml/data/datasource/polars_datasource.rb
easy_ml-0.1.2 lib/easy_ml/data/datasource/polars_datasource.rb
easy_ml-0.1.1 lib/easy_ml/data/datasource/polars_datasource.rb