lib/datasets/downloader.rb in red-datasets-0.1.1 vs lib/datasets/downloader.rb in red-datasets-0.1.2

- old
+ new

@@ -6,10 +6,12 @@ require "net/http" require "pathname" module Datasets class Downloader + class TooManyRedirects < StandardError; end + def initialize(url) if url.is_a?(URI::Generic) url = url.dup else url = URI.parse(url) @@ -29,44 +31,68 @@ if partial_output_path.exist? start = partial_output_path.size headers["Range"] = "bytes=#{start}-" end - Net::HTTP.start(@url.hostname, - @url.port, - :use_ssl => (@url.scheme == "https")) do |http| - path = @url.path - path += "?#{@url.query}" if @url.query + start_http(@url, headers) do |response| + if response.is_a?(Net::HTTPPartialContent) + mode = "ab" + else + start = nil + mode = "wb" + end + + base_name = @url.path.split("/").last + size_current = 0 + size_max = response.content_length + if start + size_current += start + size_max += start + end + progress_reporter = ProgressReporter.new(base_name, size_max) + partial_output_path.open(mode) do |output| + response.read_body do |chunk| + size_current += chunk.bytesize + progress_reporter.report(size_current) + output.write(chunk) + end + end + end + FileUtils.mv(partial_output_path, output_path) + rescue TooManyRedirects => error + last_url = error.message[/\Atoo many redirections: (.+)\z/, 1] + raise TooManyRedirects, "too many redirections: #{@url} .. #{last_url}" + end + + private def start_http(url, headers, limit = 10, &block) + if limit == 0 + raise TooManyRedirects, "too many redirections: #{url}" + end + http = Net::HTTP.new(url.hostname, url.port) + # http.set_debug_output($stderr) + http.use_ssl = (url.scheme == "https") + http.start do + path = url.path + path += "?#{url.query}" if url.query request = Net::HTTP::Get.new(path, headers) http.request(request) do |response| case response - when Net::HTTPPartialContent - mode = "ab" - when Net::HTTPSuccess - start = nil - mode = "wb" + when Net::HTTPSuccess, Net::HTTPPartialContent + return block.call(response) + when Net::HTTPRedirection + url = URI.parse(response[:location]) + $stderr.puts "Redirect to #{url}" + return start_http(url, headers, limit - 1, &block) else - break - end - - base_name = @url.path.split("/").last - size_current = 0 - size_max = response.content_length - if start - size_current += start - size_max += start - end - progress_reporter = ProgressReporter.new(base_name, size_max) - partial_output_path.open(mode) do |output| - response.read_body do |chunk| - size_current += chunk.bytesize - progress_reporter.report(size_current) - output.write(chunk) + message = response.code + if response.message and not response.message.empty? + message += ": #{response.message}" end + message += ": #{url}" + raise response.error_type.new(message, response) end end end - FileUtils.mv(partial_output_path, output_path) end class ProgressReporter def initialize(base_name, size_max) @base_name = base_name