lib/datasets/downloader.rb in red-datasets-0.0.6 vs lib/datasets/downloader.rb in red-datasets-0.0.7

- old
+ new

@@ -1,11 +1,11 @@ require "fileutils" begin require "io/console" rescue LoadError end -require "open-uri" +require "net/http" require "pathname" module Datasets class Downloader def initialize(url) @@ -13,85 +13,58 @@ url = url.dup else url = URI.parse(url) end @url = url - @url.extend(CurrentBufferReadable) + unless @url.is_a?(URI::HTTP) + raise ArgumentError, "download URL must be HTTP or HTTPS: <#{@url}>" + end end def download(output_path) output_path.parent.mkpath + headers = {"User-Agent" => "Red Datasets/#{VERSION}"} start = nil partial_output_path = Pathname.new("#{output_path}.partial") if partial_output_path.exist? start = partial_output_path.size + headers["Range"] = "bytes=#{start}-" end - progress_reporter = nil - content_length_proc = lambda do |content_length| - base_name = @url.path.split("/").last - size_max = content_length - size_max += start if start - progress_reporter = ProgressReporter.new(base_name, size_max) - end - progress_proc = lambda do |size_current| - size_current += start if start - progress_reporter.report(size_current) if progress_reporter - end - options = { - :content_length_proc => content_length_proc, - :progress_proc => progress_proc, - } - if start - options["Range"] = "bytes=#{start}-" - end + Net::HTTP.start(@url.hostname, + @url.port, + :use_ssl => (@url.scheme == "https")) do |http| + request = Net::HTTP::Get.new(@url.path, headers) + http.request(request) do |response| + case response + when Net::HTTPPartialContent + mode = "ab" + when Net::HTTPSuccess + start = nil + mode = "wb" + else + break + end - begin - @url.open(options) do |input| - copy_stream(input, partial_output_path) + base_name = @url.path.split("/").last + size_current = 0 + size_max = response.content_length + if start + size_current += start + size_max += start + end + progress_reporter = ProgressReporter.new(base_name, size_max) + partial_output_path.open(mode) do |output| + response.read_body do |chunk| + size_current += chunk.bytesize + progress_reporter.report(size_current) + output.write(chunk) + end + end end - rescue Interrupt, Net::ReadTimeout - if @url.current_buffer - input = @url.current_buffer.io - input.rewind - copy_stream(input, partial_output_path) - end - raise end - FileUtils.mv(partial_output_path, output_path) - end - - private - def copy_stream(input, partial_output_path) - if partial_output_path.exist? - # TODO: It's better that we use "206 Partial Content" response - # to detect partial response. - partial_head = partial_output_path.open("rb") do |partial_output| - partial_output.read(256) - end - input_head = input.read(partial_head.bytesize) - input.rewind - if partial_head == input_head - mode = "wb" - else - mode = "ab" - end - else - mode = "wb" - end - partial_output_path.open(mode) do |partial_output| - IO.copy_stream(input, partial_output) - end - end - - module CurrentBufferReadable - attr_reader :current_buffer - def buffer_open(buffer, proxy, options) - @current_buffer = buffer - super - end end class ProgressReporter def initialize(base_name, size_max) @base_name = base_name