lib/disco/data.rb in disco-0.2.8 vs lib/disco/data.rb in disco-0.2.9
- old
+ new
@@ -1,11 +1,13 @@
module Disco
module Data
def load_movielens
- item_path = download_file("ml-100k/u.item", "http://files.grouplens.org/datasets/movielens/ml-100k/u.item",
+ require "csv"
+
+ item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
- data_path = download_file("ml-100k/u.data", "http://files.grouplens.org/datasets/movielens/ml-100k/u.data",
+ data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
# convert u.item to utf-8
movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
@@ -27,9 +29,14 @@
end
private
def download_file(fname, origin, file_hash:)
+ require "digest"
+ require "fileutils"
+ require "net/http"
+ require "tmpdir"
+
# TODO handle this better
raise "No HOME" unless ENV["HOME"]
dest = "#{ENV["HOME"]}/.disco/#{fname}"
FileUtils.mkdir_p(File.dirname(dest))