lib/down/net_http.rb in down-4.5.0 vs lib/down/net_http.rb in down-4.6.0

- old
+ new

@@ -1,36 +1,45 @@ # frozen-string-literal: true require "open-uri" require "net/https" +require "addressable/uri" require "down/backend" require "tempfile" require "fileutils" -require "cgi" module Down + # Provides streaming downloads implemented with Net::HTTP and open-uri. class NetHttp < Backend + # Initializes the backend with common defaults. def initialize(options = {}) @options = { "User-Agent" => "Down/#{Down::VERSION}", max_redirects: 2, open_timeout: 30, read_timeout: 30, }.merge(options) end + # Downloads a remote file to disk using open-uri. Accepts any open-uri + # options, and a few more. def download(url, options = {}) options = @options.merge(options) max_size = options.delete(:max_size) max_redirects = options.delete(:max_redirects) progress_proc = options.delete(:progress_proc) content_length_proc = options.delete(:content_length_proc) destination = options.delete(:destination) + # Use open-uri's :content_lenth_proc or :progress_proc to raise an + # exception early if the file is too large. + # + # Also disable following redirects, as we'll provide our own + # implementation that has the ability to limit the number of redirects. open_uri_options = { content_length_proc: proc { |size| if size && max_size && size > max_size raise Down::TooLarge, "file is too large (max is #{max_size/1024/1024}MB)" end @@ -43,10 +52,11 @@ progress_proc.call(current_size) if progress_proc }, redirect: false, } + # Handle basic authentication in the :proxy option. if options[:proxy] proxy = URI(options.delete(:proxy)) user = proxy.user password = proxy.password @@ -60,42 +70,47 @@ end end open_uri_options.merge!(options) - uri = ensure_uri(url) + uri = ensure_uri(addressable_normalize(url)) + # Handle basic authentication in the remote URL. if uri.user || uri.password open_uri_options[:http_basic_authentication] ||= [uri.user, uri.password] uri.user = nil uri.password = nil end open_uri_file = open_uri(uri, open_uri_options, follows_remaining: max_redirects) + # Handle the fact that open-uri returns StringIOs for small files. tempfile = ensure_tempfile(open_uri_file, File.extname(open_uri_file.base_uri.path)) OpenURI::Meta.init tempfile, open_uri_file # add back open-uri methods tempfile.extend Down::NetHttp::DownloadedFile download_result(tempfile, destination) end + # Starts retrieving the remote file using Net::HTTP and returns an IO-like + # object which downloads the response body on-demand. def open(url, options = {}) + uri = ensure_uri(addressable_normalize(url)) options = @options.merge(options) - uri = ensure_uri(url) - + # Create a Fiber that halts when response headers are received. request = Fiber.new do net_http_request(uri, options) do |response| Fiber.yield response end end response = request.resume response_error!(response) unless response.is_a?(Net::HTTPSuccess) + # Build an IO-like object that will retrieve response body on-demand. Down::ChunkedIO.new( chunks: enum_for(:stream_body, response), size: response["Content-Length"] && response["Content-Length"].to_i, encoding: response.type_params["charset"], rewindable: options.fetch(:rewindable, true), @@ -111,29 +126,38 @@ ) end private + # Calls open-uri's URI::HTTP#open method. Additionally handles redirects. def open_uri(uri, options, follows_remaining: 0) - downloaded_file = uri.open(options) + uri.open(options) rescue OpenURI::HTTPRedirect => exception raise Down::TooManyRedirects, "too many redirects" if follows_remaining == 0 - uri = exception.uri + # fail if redirect URI scheme is not http or https + begin + uri = ensure_uri(exception.uri) + rescue Down::InvalidUrl + response = rebuild_response_from_open_uri_exception(exception) + raise ResponseError.new("Invalid Redirect URI: #{exception.uri}", response: response) + end + + # forward cookies on the redirect if !exception.io.meta["set-cookie"].to_s.empty? options["Cookie"] = exception.io.meta["set-cookie"] end follows_remaining -= 1 retry rescue OpenURI::HTTPError => exception - code, message = exception.io.status - response_class = Net::HTTPResponse::CODE_TO_OBJ.fetch(code) - response = response_class.new(nil, code, message) - exception.io.metas.each do |name, values| - values.each { |value| response.add_field(name, value) } + response = rebuild_response_from_open_uri_exception(exception) + + # open-uri attempts to parse the redirect URI, so we re-raise that exception + if exception.message.include?("(Invalid Location URI)") + raise ResponseError.new("Invalid Redirect URI: #{response["Location"]}", response: response) end response_error!(response) rescue => exception request_error!(exception) @@ -157,47 +181,59 @@ tempfile.open tempfile end + # Makes a Net::HTTP request and follows redirects. def net_http_request(uri, options, follows_remaining: options.fetch(:max_redirects, 2), &block) http, request = create_net_http(uri, options) begin response = http.start do http.request(request) do |response| unless response.is_a?(Net::HTTPRedirection) yield response - response.instance_variable_set("@read", true) # mark response as read + # In certain cases the caller wants to download only one portion + # of the file and close the connection, so we tell Net::HTTP that + # it shouldn't continue retrieving it. + response.instance_variable_set("@read", true) end end end rescue => exception request_error!(exception) end if response.is_a?(Net::HTTPRedirection) raise Down::TooManyRedirects if follows_remaining == 0 - location = URI.parse(response["Location"]) + # fail if redirect URI is not a valid http or https URL + begin + location = ensure_uri(response["Location"], allow_relative: true) + rescue Down::InvalidUrl + raise ResponseError.new("Invalid Redirect URI: #{response["Location"]}", response: response) + end + + # handle relative redirects location = uri + location if location.relative? net_http_request(location, options, follows_remaining: follows_remaining - 1, &block) end end + # Build a Net::HTTP object for making a request. def create_net_http(uri, options) http_class = Net::HTTP if options[:proxy] proxy = URI(options[:proxy]) http_class = Net::HTTP::Proxy(proxy.hostname, proxy.port, proxy.user, proxy.password) end http = http_class.new(uri.host, uri.port) - # taken from open-uri implementation + # Handle SSL parameters (taken from the open-uri implementation). if uri.is_a?(URI::HTTPS) http.use_ssl = true http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER store = OpenSSL::X509::Store.new if options[:ssl_ca_cert] @@ -212,32 +248,63 @@ http.read_timeout = options[:read_timeout] if options.key?(:read_timeout) http.open_timeout = options[:open_timeout] if options.key?(:open_timeout) request_headers = options.select { |key, value| key.is_a?(String) } - request_headers["Accept-Encoding"] = "" # otherwise FiberError can be raised + request_headers["Accept-Encoding"] = "" # Net::HTTP's inflater causes FiberErrors get = Net::HTTP::Get.new(uri.request_uri, request_headers) get.basic_auth(uri.user, uri.password) if uri.user || uri.password [http, get] end + # Yields chunks of the response body to the block. def stream_body(response, &block) response.read_body(&block) rescue => exception request_error!(exception) end - def ensure_uri(url) - uri = URI(url) - raise Down::InvalidUrl, "URL scheme needs to be http or https" unless uri.is_a?(URI::HTTP) + # Checks that the url is a valid URI and that its scheme is http or https. + def ensure_uri(url, allow_relative: false) + begin + uri = URI(url) + rescue URI::InvalidURIError => exception + raise Down::InvalidUrl, exception.message + end + + unless allow_relative && uri.relative? + raise Down::InvalidUrl, "URL scheme needs to be http or https: #{uri}" unless uri.is_a?(URI::HTTP) + end + uri - rescue URI::InvalidURIError => exception - raise Down::InvalidUrl, exception.message end + # Makes sure that the URL is properly encoded. + def addressable_normalize(url) + addressable_uri = Addressable::URI.parse(url) + addressable_uri.normalize.to_s + end + + # When open-uri raises an exception, it doesn't expose the response object. + # Fortunately, the exception object holds response data that can be used to + # rebuild the Net::HTTP response object. + def rebuild_response_from_open_uri_exception(exception) + code, message = exception.io.status + + response_class = Net::HTTPResponse::CODE_TO_OBJ.fetch(code) + response = response_class.new(nil, code, message) + + exception.io.metas.each do |name, values| + values.each { |value| response.add_field(name, value) } + end + + response + end + + # Raises non-sucessful response as a Down::ResponseError. def response_error!(response) code = response.code.to_i message = response.message.split(" ").map(&:capitalize).join(" ") args = ["#{code} #{message}", response: response] @@ -247,10 +314,11 @@ when 500..599 then raise Down::ServerError.new(*args) else raise Down::ResponseError.new(*args) end end + # Re-raise Net::HTTP exceptions as Down::Error exceptions. def request_error!(exception) case exception when Net::OpenTimeout raise Down::TimeoutError, "timed out waiting for connection to open" when Net::ReadTimeout @@ -262,9 +330,11 @@ else raise exception end end + # Defines some additional attributes for the returned Tempfile (on top of what + # OpenURI::Meta already defines). module DownloadedFile def original_filename Utils.filename_from_content_disposition(meta["content-disposition"]) || Utils.filename_from_path(base_uri.path) end