Sha256: 842e5ca591aa5f1b4fc929e87a080237640211f9e6bc49831e6067856e54890f
Contents?: true
Size: 1.71 KB
Versions: 3
Compression:
Stored size: 1.71 KB
Contents
require "csv" require_relative 'zip-extractor' module Datasets class CaliforniaHousing < Dataset Record = Struct.new(:median_house_value, :median_income, :housing_median_age, :total_rooms, :total_bedrooms, :population, :households, :latitude, :longitude) def initialize super() @metadata.id = "california-housing" @metadata.name = "California Housing" @metadata.url = "http://lib.stat.cmu.edu/datasets/" @metadata.licenses = ["CCO"] @metadata.description = <<-DESCRIPTION Housing information from the 1990 census used in Pace, R. Kelley and Ronald Barry, "Sparse Spatial Autoregressions", Statistics and Probability Letters, 33 (1997) 291-297. Available from http://lib.stat.cmu.edu/datasets/. DESCRIPTION end def each return to_enum(__method__) unless block_given? data_path = cache_dir_path + "houses.zip" data_url = "http://lib.stat.cmu.edu/datasets/houses.zip" file_name = "cadata.txt" download(data_path, data_url) open_data(data_path, file_name) do |input| data = "" input.each_line do |line| next unless line.start_with?(" ") data << line.lstrip.gsub(/ +/, ",") end options = { converters: [:numeric], } CSV.parse(data, **options) do |row| yield(Record.new(*row)) end end end private def open_data(data_path, file_name) ZipExtractor.new(data_path).extract_first_file do |input| yield input end end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
red-datasets-0.1.7 | lib/datasets/california-housing.rb |
red-datasets-0.1.6 | lib/datasets/california-housing.rb |
red-datasets-0.1.5 | lib/datasets/california-housing.rb |