require "open-uri" require "forwardable" require "cgi" module Refile # This class downloads a given URL and returns its IO, size, content type and # original file name. # # Usage: # # download = Refile::Download.new('http://example.com/my/data.bin') # download.io # #=> # # download.size # #=> 389620 # download.content_type # #=> "application/octet-stream" # download.original_file_name # #=> "data.bin" class Download OPTIONS = { "User-Agent" => "Refile/#{Refile::VERSION}", open_timeout: 30, read_timeout: 30, redirect: false }.freeze extend Forwardable def_delegators :@io, :size, :content_type attr_reader :io, :original_filename def initialize(uri) @io = download(uri) @original_filename = extract_original_filename end private def download(uri) uri = ensure_uri(uri) follows_remaining = 10 begin uri.open(OPTIONS) rescue OpenURI::HTTPRedirect => exception raise Refile::TooManyRedirects if follows_remaining.zero? uri = ensure_uri(exception.uri) follows_remaining -= 1 retry rescue OpenURI::HTTPError => exception if exception.message.include?("(Invalid Location URI)") raise Refile::InvalidUrl, "Invalid Redirect URI: #{response["Location"]}" end raise exception end end def ensure_uri(url) begin uri = URI(url) rescue URI::InvalidURIError raise Refile::InvalidUrl, "Invalid URI: #{uri.inspect}" end unless uri.is_a?(URI::HTTP) raise Refile::InvalidUrl, "URL scheme needs to be http or https: #{uri}" end uri end def extract_original_filename filename_from_content_disposition || filename_from_path end def filename_from_content_disposition content_disposition = @io.meta["content-disposition"].to_s escaped_filename = content_disposition[/filename\*=UTF-8''(\S+)/, 1] || content_disposition[/filename="([^"]*)"/, 1] || content_disposition[/filename=(\S+)/, 1] filename = CGI.unescape(escaped_filename.to_s) filename unless filename.empty? end def filename_from_path filename = @io.base_uri.path.split("/").last CGI.unescape(filename) if filename end end end