Sha256: d17d8d9427b73d208610714eb563593e12bf3f8cebdafb1a43c3324d89ba7031
Contents?: true
Size: 1.86 KB
Versions: 3
Compression:
Stored size: 1.86 KB
Contents
require "csv" require_relative "dataset" module Datasets class Adult < Dataset Record = Struct.new( :age, :work_class, :final_weight, :education, :n_education_years, :marital_status, :occupation, :relationship, :race, :sex, :capital_gain, :capital_loss, :hours_per_week, :native_country, :label ) def initialize(type: :train) unless [:train, :test].include?(type) raise ArgumentError, 'Please set type :train or :test' end super() @type = type @metadata.id = "adult-#{@type}" @metadata.name = "Adult: #{@type}" @metadata.url = "http://archive.ics.uci.edu/ml/datasets/adult" @metadata.description = lambda do read_names end end def each return to_enum(__method__) unless block_given? open_data do |csv| csv.each do |row| next if row[0].nil? record = Record.new(*row) yield(record) end end end private def open_data case @type when :train ext = "data" when :test ext = "test" end data_path = cache_dir_path + "adult-#{ext}.csv" unless data_path.exist? data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.#{ext}" download(data_path, data_url) end CSV.open(data_path, { converters: [:numeric, lambda {|f| f.strip}], skip_lines: /\A\|/, }) do |csv| yield(csv) end end def read_names names_path = cache_dir_path + "adult.names" unless names_path.exist? names_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names" download(names_path, names_url) end names_path.read end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
red-datasets-0.0.9 | lib/datasets/adult.rb |
red-datasets-0.0.8 | lib/datasets/adult.rb |
red-datasets-0.0.7 | lib/datasets/adult.rb |