Sha256: 5815bf9552df3ce3d971356585c74067ff0bd5122a57933885b0ad1a46ee8225

Contents?: true

Size: 1.35 KB

Versions: 2

Compression:

Stored size: 1.35 KB

Contents

module Datasets
  class SeabornData < Dataset
    URL_FORMAT = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/%{name}.csv".freeze

    def initialize(name)
      super()
      @metadata.id = "seaborn-data-#{name}"
      @metadata.name = "SeabornData: #{name}"
      @metadata.url = URL_FORMAT % {name: name}

      @data_path = cache_dir_path + (name + ".csv")
      @name = name
    end

    def each(&block)
      return to_enum(__method__) unless block_given?

      download(@data_path, @metadata.url) unless @data_path.exist?
      CSV.open(@data_path, headers: :first_row, converters: :all) do |csv|
        csv.each do |row|
          record = prepare_record(row)
          yield record
        end
      end
    end

    private
    def prepare_record(csv_row)
      record = csv_row.to_h
      record.transform_keys!(&:to_sym)

      # Perform the same preprocessing as seaborn's load_dataset function
      preprocessor = :"preprocess_#{@name}_record"
      __send__(preprocessor, record) if respond_to?(preprocessor, true)

      record
    end

    # The same preprocessing as seaborn.load_dataset
    def preprocess_flights_record(record)
      record[:month] &&= record[:month][0,3]
    end

    # The same preprocessing as seaborn.load_dataset
    def preprocess_penguins_record(record)
      record[:sex] &&= record[:sex].capitalize
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
red-datasets-0.1.4 lib/datasets/seaborn-data.rb
red-datasets-0.1.3 lib/datasets/seaborn-data.rb