lib/datasets/downloader.rb in red-datasets-0.0.2 vs lib/datasets/downloader.rb in red-datasets-0.0.3
- old
+ new
@@ -1,64 +1,286 @@
require "fileutils"
+begin
+ require "io/console"
+rescue LoadError
+end
require "open-uri"
+require "pathname"
module Datasets
class Downloader
def initialize(url)
- url = URI.parse(url) unless url.is_a?(URI::Generic)
+ if url.is_a?(URI::Generic)
+ url = url.dup
+ else
+ url = URI.parse(url)
+ end
@url = url
+ @url.extend(CurrentBufferReadable)
end
def download(output_path)
output_path.parent.mkpath
- if $stderr == STDERR and $stderr.tty?
- max = nil
+ start = nil
+ partial_output_path = Pathname.new("#{output_path}.partial")
+ if partial_output_path.exist?
+ start = partial_output_path.size
+ end
+
+ progress_reporter = nil
+ content_length_proc = lambda do |content_length|
base_name = @url.path.split("/").last
- content_length_proc = lambda do |content_length|
- max = content_length
- end
- progress_proc = lambda do |current|
- if max
- percent = (current / max.to_f) * 100
- formatted_size = "[%s/%s]" % [format_size(current), format_size(max)]
- $stderr.print("\r%s - %06.2f%% %s" %
- [base_name, percent, formatted_size])
- $stderr.puts if current == max
- end
- end
- options = {
- :content_length_proc => content_length_proc,
- :progress_proc => progress_proc,
- }
- else
- options = {}
+ size_max = content_length
+ size_max += start if start
+ progress_reporter = ProgressReporter.new(base_name, size_max)
end
+ progress_proc = lambda do |size_current|
+ size_current += start if start
+ progress_reporter.report(size_current) if progress_reporter
+ end
+ options = {
+ :content_length_proc => content_length_proc,
+ :progress_proc => progress_proc,
+ }
+ if start
+ options["Range"] = "bytes=#{start}-"
+ end
begin
@url.open(options) do |input|
- output_path.open("wb") do |output|
- IO.copy_stream(input, output)
- end
+ copy_stream(input, partial_output_path)
end
- rescue
- FileUtils.rm_f(output_path)
+ rescue Interrupt, Net::ReadTimeout
+ if @url.current_buffer
+ input = @url.current_buffer.io
+ input.rewind
+ copy_stream(input, partial_output_path)
+ end
raise
end
+
+ FileUtils.mv(partial_output_path, output_path)
end
private
- def format_size(size)
- if size < 1024
- "%d" % size
- elsif size < (1024 ** 2)
- "%7.2fKiB" % (size.to_f / 1024)
- elsif size < (1024 ** 3)
- "%7.2fMiB" % (size.to_f / (1024 ** 2))
- elsif size < (1024 ** 4)
- "%7.2fGiB" % (size.to_f / (1024 ** 3))
+ def copy_stream(input, partial_output_path)
+ if partial_output_path.exist?
+ # TODO: It's better that we use "206 Partial Content" response
+ # to detect partial response.
+ partial_head = partial_output_path.open("rb") do |partial_output|
+ partial_output.read(256)
+ end
+ input_head = input.read(partial_head.bytesize)
+ input.rewind
+ if partial_head == input_head
+ mode = "wb"
+ else
+ mode = "ab"
+ end
else
- "%.2fTiB" % (size.to_f / (1024 ** 4))
+ mode = "wb"
+ end
+ partial_output_path.open(mode) do |partial_output|
+ IO.copy_stream(input, partial_output)
+ end
+ end
+
+ module CurrentBufferReadable
+ attr_reader :current_buffer
+ def buffer_open(buffer, proxy, options)
+ @current_buffer = buffer
+ super
+ end
+ end
+
+ class ProgressReporter
+ def initialize(base_name, size_max)
+ @base_name = base_name
+ @size_max = size_max
+
+ @time_previous = Time.now
+ @size_previous = 0
+
+ @need_report = ($stderr == STDERR and $stderr.tty?)
+ end
+
+ def report(size_current)
+ return unless @need_report
+ return if @size_max.nil?
+ return unless foreground?
+
+ done = (size_current == @size_max)
+ time_current = Time.now
+ if not done and time_current - @time_previous <= 1
+ return
+ end
+
+ read_bytes = size_current - @size_previous
+ throughput = read_bytes.to_f / (time_current - @time_previous)
+ @time_previous = time_current
+ @size_previous = size_current
+
+ message = build_message(size_current, throughput)
+ $stderr.print("\r#{message}") if message
+ $stderr.puts if done
+ end
+
+ private
+ def build_message(size_current, throughput)
+ percent = (size_current / @size_max.to_f) * 100
+ formatted_size = "[%s/%s]" % [
+ format_size(size_current),
+ format_size(@size_max),
+ ]
+ rest_second = (@size_max - size_current) / throughput
+ separator = " - "
+ progress = "%05.1f%% %s %s %s" % [
+ percent,
+ formatted_size,
+ format_time_interval(rest_second),
+ format_throughput(throughput),
+ ]
+ base_name = @base_name
+
+ width = guess_terminal_width
+ return "#{base_name}#{separator}#{progress}" if width.nil?
+
+ return nil if progress.size > width
+
+ base_name_width = width - progress.size - separator.size
+ if base_name.size > base_name_width
+ ellipsis = "..."
+ shorten_base_name_width = base_name_width - ellipsis.size
+ if shorten_base_name_width < 1
+ return progress
+ else
+ base_name = base_name[0, shorten_base_name_width] + ellipsis
+ end
+ end
+ "#{base_name}#{separator}#{progress}"
+ end
+
+ def format_size(size)
+ if size < 1000
+ "%d" % size
+ elsif size < (1000 ** 2)
+ "%6.2fKB" % (size.to_f / 1000)
+ elsif size < (1000 ** 3)
+ "%6.2fMB" % (size.to_f / (1000 ** 2))
+ elsif size < (1000 ** 4)
+ "%6.2fGB" % (size.to_f / (1000 ** 3))
+ else
+ "%.2fTB" % (size.to_f / (1000 ** 4))
+ end
+ end
+
+ def format_time_interval(interval)
+ if interval < 60
+ "00:00:%02d" % interval
+ elsif interval < (60 * 60)
+ minute, second = interval.divmod(60)
+ "00:%02d:%02d" % [minute, second]
+ elsif interval < (60 * 60 * 24)
+ minute, second = interval.divmod(60)
+ hour, minute = minute.divmod(60)
+ "%02d:%02d:%02d" % [hour, minute, second]
+ else
+ minute, second = interval.divmod(60)
+ hour, minute = minute.divmod(60)
+ day, hour = hour.divmod(24)
+ "%dd %02d:%02d:%02d" % [day, hour, minute, second]
+ end
+ end
+
+ def format_throughput(throughput)
+ throughput_byte = throughput / 8
+ if throughput_byte <= 1000
+ "%3dB/s" % throughput_byte
+ elsif throughput_byte <= (1000 ** 2)
+ "%3dKB/s" % (throughput_byte / 1000)
+ elsif throughput_byte <= (1000 ** 3)
+ "%3dMB/s" % (throughput_byte / (1000 ** 2))
+ else
+ "%3dGB/s" % (throughput_byte / (1000 ** 3))
+ end
+ end
+
+ def foreground?
+ proc_stat_path = "/proc/self/stat"
+ ps_path = "/bin/ps"
+
+ if File.exist?(proc_stat_path)
+ stat = File.read(proc_stat_path).sub(/\A.+\) /, "").split
+ process_group_id = stat[2]
+ terminal_process_group_id = stat[5]
+ process_group_id == terminal_process_group_id
+ elsif File.executable?(ps_path)
+ IO.pipe do |input, output|
+ pid = spawn(ps_path, "-o", "stat", "-p", Process.pid.to_s,
+ {:out => output, :err => output})
+ output.close
+ _, status = Process.waitpid2(pid)
+ return false unless status.success?
+
+ input.each_line.to_a.last.include?("+")
+ end
+ else
+ false
+ end
+ end
+
+ def guess_terminal_width
+ guess_terminal_width_from_io ||
+ guess_terminal_width_from_command ||
+ guess_terminal_width_from_env ||
+ 80
+ end
+
+ def guess_terminal_width_from_io
+ if IO.respond_to?(:console)
+ IO.console.winsize[1]
+ elsif $stderr.respond_to?(:winsize)
+ begin
+ $stderr.winsize[1]
+ rescue SystemCallError
+ nil
+ end
+ else
+ nil
+ end
+ end
+
+ def guess_terminal_width_from_command
+ IO.pipe do |input, output|
+ begin
+ pid = spawn("tput", "cols", {:out => output, :err => output})
+ rescue SystemCallError
+ return nil
+ end
+
+ output.close
+ _, status = Process.waitpid2(pid)
+ return nil unless status.success?
+
+ result = input.read.chomp
+ begin
+ Integer(result, 10)
+ rescue ArgumentError
+ nil
+ end
+ end
+ end
+
+ def guess_terminal_width_from_env
+ env = ENV["COLUMNS"] || ENV["TERM_WIDTH"]
+ return nil if env.nil?
+
+ begin
+ Integer(env, 10)
+ rescue ArgumentError
+ nil
+ end
end
end
end
end