Sha256: dcd934bbfd74be650a9f1c2fff1aabc561a2c04afbc7bb750f466e98992435bf
Contents?: true
Size: 1.82 KB
Versions: 3
Compression:
Stored size: 1.82 KB
Contents
require "csv" require_relative "dataset" module Datasets class Adult < Dataset Record = Struct.new( :age, :work_class, :final_weight, :education, :n_education_years, :marital_status, :occupation, :relationship, :race, :sex, :capital_gain, :capital_loss, :hours_per_week, :native_country, :label ) def initialize(type: :train) unless [:train, :test].include?(type) raise ArgumentError, 'Please set type :train or :test' end super() @type = type @metadata.id = "adult-#{@type}" @metadata.name = "Adult: #{@type}" @metadata.url = "https://archive.ics.uci.edu/ml/datasets/adult" @metadata.licenses = ["CC-BY-4.0"] @metadata.description = lambda do read_names end end def each return to_enum(__method__) unless block_given? open_data do |csv| csv.each do |row| next if row[0].nil? record = Record.new(*row) yield(record) end end end private def open_data case @type when :train ext = "data" when :test ext = "test" end data_path = cache_dir_path + "adult-#{ext}.csv" data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.#{ext}" download(data_path, data_url) options = { converters: [:numeric, lambda {|f| f.strip}], skip_lines: /\A\|/, } CSV.open(data_path, **options) do |csv| yield(csv) end end def read_names names_path = cache_dir_path + "adult.names" names_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names" download(names_path, names_url) names_path.read end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
red-datasets-0.1.7 | lib/datasets/adult.rb |
red-datasets-0.1.6 | lib/datasets/adult.rb |
red-datasets-0.1.5 | lib/datasets/adult.rb |