lib/datasets/downloader.rb in red-datasets-0.1.1 vs lib/datasets/downloader.rb in red-datasets-0.1.2
- old
+ new
@@ -6,10 +6,12 @@
require "net/http"
require "pathname"
module Datasets
class Downloader
+ class TooManyRedirects < StandardError; end
+
def initialize(url)
if url.is_a?(URI::Generic)
url = url.dup
else
url = URI.parse(url)
@@ -29,44 +31,68 @@
if partial_output_path.exist?
start = partial_output_path.size
headers["Range"] = "bytes=#{start}-"
end
- Net::HTTP.start(@url.hostname,
- @url.port,
- :use_ssl => (@url.scheme == "https")) do |http|
- path = @url.path
- path += "?#{@url.query}" if @url.query
+ start_http(@url, headers) do |response|
+ if response.is_a?(Net::HTTPPartialContent)
+ mode = "ab"
+ else
+ start = nil
+ mode = "wb"
+ end
+
+ base_name = @url.path.split("/").last
+ size_current = 0
+ size_max = response.content_length
+ if start
+ size_current += start
+ size_max += start
+ end
+ progress_reporter = ProgressReporter.new(base_name, size_max)
+ partial_output_path.open(mode) do |output|
+ response.read_body do |chunk|
+ size_current += chunk.bytesize
+ progress_reporter.report(size_current)
+ output.write(chunk)
+ end
+ end
+ end
+ FileUtils.mv(partial_output_path, output_path)
+ rescue TooManyRedirects => error
+ last_url = error.message[/\Atoo many redirections: (.+)\z/, 1]
+ raise TooManyRedirects, "too many redirections: #{@url} .. #{last_url}"
+ end
+
+ private def start_http(url, headers, limit = 10, &block)
+ if limit == 0
+ raise TooManyRedirects, "too many redirections: #{url}"
+ end
+ http = Net::HTTP.new(url.hostname, url.port)
+ # http.set_debug_output($stderr)
+ http.use_ssl = (url.scheme == "https")
+ http.start do
+ path = url.path
+ path += "?#{url.query}" if url.query
request = Net::HTTP::Get.new(path, headers)
http.request(request) do |response|
case response
- when Net::HTTPPartialContent
- mode = "ab"
- when Net::HTTPSuccess
- start = nil
- mode = "wb"
+ when Net::HTTPSuccess, Net::HTTPPartialContent
+ return block.call(response)
+ when Net::HTTPRedirection
+ url = URI.parse(response[:location])
+ $stderr.puts "Redirect to #{url}"
+ return start_http(url, headers, limit - 1, &block)
else
- break
- end
-
- base_name = @url.path.split("/").last
- size_current = 0
- size_max = response.content_length
- if start
- size_current += start
- size_max += start
- end
- progress_reporter = ProgressReporter.new(base_name, size_max)
- partial_output_path.open(mode) do |output|
- response.read_body do |chunk|
- size_current += chunk.bytesize
- progress_reporter.report(size_current)
- output.write(chunk)
+ message = response.code
+ if response.message and not response.message.empty?
+ message += ": #{response.message}"
end
+ message += ": #{url}"
+ raise response.error_type.new(message, response)
end
end
end
- FileUtils.mv(partial_output_path, output_path)
end
class ProgressReporter
def initialize(base_name, size_max)
@base_name = base_name