Sha256: c23712e2183208609eeeb17f440d54f1a77825fdca4d28a47c691c99c63a8c66

Contents?: true

Size: 1.26 KB

Versions: 22

Compression:

Stored size: 1.26 KB

Contents

require_relative 'all_jobs_shared'

class AggregateJob
  include AllJobsShared
  using Remi::Refinements::Daru

  define_source :source_data, Remi::DataSource::DataFrame
  define_target :target_data, Remi::DataTarget::DataFrame
  define_target :multigroup_target_data, Remi::DataTarget::DataFrame

  define_transform :main, sources: :source_data, targets: :target_data do
    mymin = lambda do |field, df, group_key, indicies|
      values = indicies.map { |idx| df.row[idx][field] }
      "Group #{group_key} has a minimum value of #{values.min}"
    end

    # Daru groups don't use the index of the dataframe when returning groups (WTF?).
    # Instead they return the position of the record in the dataframe.  Here, we
    # shift the indexes which causes a failure if this artifact is not handled
    # properly in the aggregate function
    source_data.df.index = Daru::Index.new(1.upto(source_data.df.size).to_a)

    target_data.df = source_data.df.aggregate(by: :alpha, func: mymin.curry.(:year)).detach_index
    target_data.df.vectors = Daru::Index.new([:alpha, :year])

    multigroup_target_data.df = source_data.df.aggregate(by: [:alpha,:beta], func: mymin.curry.(:year)).detach_index
    multigroup_target_data.df.vectors = Daru::Index.new([:alpha_beta, :year])



  end
end

Version data entries

22 entries across 22 versions & 1 rubygems

Version Path
remi-0.2.7 lib/remi/project/jobs/aggregate_job.rb
remi-0.2.6 lib/remi/project/jobs/aggregate_job.rb