lib/datasets/downloader.rb in red-datasets-0.0.2 vs lib/datasets/downloader.rb in red-datasets-0.0.3

- old
+ new

@@ -1,64 +1,286 @@ require "fileutils" +begin + require "io/console" +rescue LoadError +end require "open-uri" +require "pathname" module Datasets class Downloader def initialize(url) - url = URI.parse(url) unless url.is_a?(URI::Generic) + if url.is_a?(URI::Generic) + url = url.dup + else + url = URI.parse(url) + end @url = url + @url.extend(CurrentBufferReadable) end def download(output_path) output_path.parent.mkpath - if $stderr == STDERR and $stderr.tty? - max = nil + start = nil + partial_output_path = Pathname.new("#{output_path}.partial") + if partial_output_path.exist? + start = partial_output_path.size + end + + progress_reporter = nil + content_length_proc = lambda do |content_length| base_name = @url.path.split("/").last - content_length_proc = lambda do |content_length| - max = content_length - end - progress_proc = lambda do |current| - if max - percent = (current / max.to_f) * 100 - formatted_size = "[%s/%s]" % [format_size(current), format_size(max)] - $stderr.print("\r%s - %06.2f%% %s" % - [base_name, percent, formatted_size]) - $stderr.puts if current == max - end - end - options = { - :content_length_proc => content_length_proc, - :progress_proc => progress_proc, - } - else - options = {} + size_max = content_length + size_max += start if start + progress_reporter = ProgressReporter.new(base_name, size_max) end + progress_proc = lambda do |size_current| + size_current += start if start + progress_reporter.report(size_current) if progress_reporter + end + options = { + :content_length_proc => content_length_proc, + :progress_proc => progress_proc, + } + if start + options["Range"] = "bytes=#{start}-" + end begin @url.open(options) do |input| - output_path.open("wb") do |output| - IO.copy_stream(input, output) - end + copy_stream(input, partial_output_path) end - rescue - FileUtils.rm_f(output_path) + rescue Interrupt, Net::ReadTimeout + if @url.current_buffer + input = @url.current_buffer.io + input.rewind + copy_stream(input, partial_output_path) + end raise end + + FileUtils.mv(partial_output_path, output_path) end private - def format_size(size) - if size < 1024 - "%d" % size - elsif size < (1024 ** 2) - "%7.2fKiB" % (size.to_f / 1024) - elsif size < (1024 ** 3) - "%7.2fMiB" % (size.to_f / (1024 ** 2)) - elsif size < (1024 ** 4) - "%7.2fGiB" % (size.to_f / (1024 ** 3)) + def copy_stream(input, partial_output_path) + if partial_output_path.exist? + # TODO: It's better that we use "206 Partial Content" response + # to detect partial response. + partial_head = partial_output_path.open("rb") do |partial_output| + partial_output.read(256) + end + input_head = input.read(partial_head.bytesize) + input.rewind + if partial_head == input_head + mode = "wb" + else + mode = "ab" + end else - "%.2fTiB" % (size.to_f / (1024 ** 4)) + mode = "wb" + end + partial_output_path.open(mode) do |partial_output| + IO.copy_stream(input, partial_output) + end + end + + module CurrentBufferReadable + attr_reader :current_buffer + def buffer_open(buffer, proxy, options) + @current_buffer = buffer + super + end + end + + class ProgressReporter + def initialize(base_name, size_max) + @base_name = base_name + @size_max = size_max + + @time_previous = Time.now + @size_previous = 0 + + @need_report = ($stderr == STDERR and $stderr.tty?) + end + + def report(size_current) + return unless @need_report + return if @size_max.nil? + return unless foreground? + + done = (size_current == @size_max) + time_current = Time.now + if not done and time_current - @time_previous <= 1 + return + end + + read_bytes = size_current - @size_previous + throughput = read_bytes.to_f / (time_current - @time_previous) + @time_previous = time_current + @size_previous = size_current + + message = build_message(size_current, throughput) + $stderr.print("\r#{message}") if message + $stderr.puts if done + end + + private + def build_message(size_current, throughput) + percent = (size_current / @size_max.to_f) * 100 + formatted_size = "[%s/%s]" % [ + format_size(size_current), + format_size(@size_max), + ] + rest_second = (@size_max - size_current) / throughput + separator = " - " + progress = "%05.1f%% %s %s %s" % [ + percent, + formatted_size, + format_time_interval(rest_second), + format_throughput(throughput), + ] + base_name = @base_name + + width = guess_terminal_width + return "#{base_name}#{separator}#{progress}" if width.nil? + + return nil if progress.size > width + + base_name_width = width - progress.size - separator.size + if base_name.size > base_name_width + ellipsis = "..." + shorten_base_name_width = base_name_width - ellipsis.size + if shorten_base_name_width < 1 + return progress + else + base_name = base_name[0, shorten_base_name_width] + ellipsis + end + end + "#{base_name}#{separator}#{progress}" + end + + def format_size(size) + if size < 1000 + "%d" % size + elsif size < (1000 ** 2) + "%6.2fKB" % (size.to_f / 1000) + elsif size < (1000 ** 3) + "%6.2fMB" % (size.to_f / (1000 ** 2)) + elsif size < (1000 ** 4) + "%6.2fGB" % (size.to_f / (1000 ** 3)) + else + "%.2fTB" % (size.to_f / (1000 ** 4)) + end + end + + def format_time_interval(interval) + if interval < 60 + "00:00:%02d" % interval + elsif interval < (60 * 60) + minute, second = interval.divmod(60) + "00:%02d:%02d" % [minute, second] + elsif interval < (60 * 60 * 24) + minute, second = interval.divmod(60) + hour, minute = minute.divmod(60) + "%02d:%02d:%02d" % [hour, minute, second] + else + minute, second = interval.divmod(60) + hour, minute = minute.divmod(60) + day, hour = hour.divmod(24) + "%dd %02d:%02d:%02d" % [day, hour, minute, second] + end + end + + def format_throughput(throughput) + throughput_byte = throughput / 8 + if throughput_byte <= 1000 + "%3dB/s" % throughput_byte + elsif throughput_byte <= (1000 ** 2) + "%3dKB/s" % (throughput_byte / 1000) + elsif throughput_byte <= (1000 ** 3) + "%3dMB/s" % (throughput_byte / (1000 ** 2)) + else + "%3dGB/s" % (throughput_byte / (1000 ** 3)) + end + end + + def foreground? + proc_stat_path = "/proc/self/stat" + ps_path = "/bin/ps" + + if File.exist?(proc_stat_path) + stat = File.read(proc_stat_path).sub(/\A.+\) /, "").split + process_group_id = stat[2] + terminal_process_group_id = stat[5] + process_group_id == terminal_process_group_id + elsif File.executable?(ps_path) + IO.pipe do |input, output| + pid = spawn(ps_path, "-o", "stat", "-p", Process.pid.to_s, + {:out => output, :err => output}) + output.close + _, status = Process.waitpid2(pid) + return false unless status.success? + + input.each_line.to_a.last.include?("+") + end + else + false + end + end + + def guess_terminal_width + guess_terminal_width_from_io || + guess_terminal_width_from_command || + guess_terminal_width_from_env || + 80 + end + + def guess_terminal_width_from_io + if IO.respond_to?(:console) + IO.console.winsize[1] + elsif $stderr.respond_to?(:winsize) + begin + $stderr.winsize[1] + rescue SystemCallError + nil + end + else + nil + end + end + + def guess_terminal_width_from_command + IO.pipe do |input, output| + begin + pid = spawn("tput", "cols", {:out => output, :err => output}) + rescue SystemCallError + return nil + end + + output.close + _, status = Process.waitpid2(pid) + return nil unless status.success? + + result = input.read.chomp + begin + Integer(result, 10) + rescue ArgumentError + nil + end + end + end + + def guess_terminal_width_from_env + env = ENV["COLUMNS"] || ENV["TERM_WIDTH"] + return nil if env.nil? + + begin + Integer(env, 10) + rescue ArgumentError + nil + end end end end end