lib/omnibus/fetchers/net_fetcher.rb in omnibus-3.2.2 vs lib/omnibus/fetchers/net_fetcher.rb in omnibus-4.0.0.beta.1
- old
+ new
@@ -12,222 +12,263 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
-require 'net/http'
-require 'net/https'
-require 'net/ftp'
+require 'fileutils'
+require 'open-uri'
module Omnibus
- class UnsupportedURIScheme < ArgumentError
- end
-
- class InvalidSourceFile < RuntimeError
- end
-
- # Fetcher Implementation for HTTP and FTP hosted tarballs
class NetFetcher < Fetcher
- attr_reader :name
- attr_reader :project_file
- attr_reader :source
- attr_reader :source_uri
- attr_reader :project_dir
-
# Use 7-zip to extract 7z/zip for Windows
WIN_7Z_EXTENSIONS = %w(.7z .zip)
# tar probably has compression scheme linked in, otherwise for tarballs
TAR_EXTENSIONS = %w(.tar .tar.gz .tgz .bz2 .tar.xz .txz)
- def initialize(software)
- @name = software.name
- @checksum = software.checksum
- @source = software.source
- @project_file = software.project_file
- @source_uri = software.source_uri
- @project_dir = software.project_dir
- super
+ #
+ # A fetch is required if the downloaded_file (such as a tarball) does not
+ # exist on disk, or if the checksum of the downloaded file is different
+ # than the given checksum.
+ #
+ # @return [true, false]
+ #
+ def fetch_required?
+ !(File.exist?(downloaded_file) && digest(downloaded_file, :md5) == checksum)
end
- def description
- <<-EOH.gsub(/^ {8}/, '').strip
- source URI: #{source_uri}
- checksum: #{@checksum}
- local location: #{@project_file}
- EOH
- end
-
+ #
+ # The version identifier for this remote location. This is computed using
+ # the name of the software, the version of the software, and the checksum.
+ #
+ # @return [String]
+ #
def version_guid
- "md5:#{@checksum}"
+ "md5:#{checksum}"
end
- def fetch_required?
- !File.exist?(project_file) || Digest::MD5.file(project_file) != @checksum
- end
-
+ #
+ # Clean the project directory by removing the contents from disk.
+ #
+ # @return [true, false]
+ # true if the project directory was removed, false otherwise
+ #
def clean
if File.exist?(project_dir)
- log.info(log_key) { "Cleaning existing build from #{project_dir}" }
-
+ log.info(log_key) { "Cleaning project directory `#{project_dir}'" }
FileUtils.rm_rf(project_dir)
+ extract
+ true
+ else
+ extract
+ false
end
- extract
end
+ #
+ # Fetch the given software definition. This method **always** fetches the
+ # file, even if it already exists on disk! You should use {#fetch_required?}
+ # to guard against this check in your implementation.
+ #
+ # @return [void]
+ #
def fetch
- if fetch_required?
- download
- verify_checksum!
- else
- log.debug(log_key) { 'Cached copy of source tarball up to date' }
- end
+ log.info(log_key) { "Downloading from `#{download_url}'" }
+
+ create_required_directories
+ download
+ verify_checksum!
+ extract
end
- def get_with_redirect(url, headers, limit = 10)
- raise ArgumentError, 'HTTP redirect too deep' if limit == 0
- log.info(log_key) { "Getting from #{url} with #{limit} redirects left" }
+ #
+ # The version for this item in the cache. The is the md5 of downloaded file
+ # and the URL where it was downloaded from.
+ #
+ # @return [String]
+ #
+ def version_for_cache
+ "download_url:#{source[:url]}|md5:#{source[:md5]}"
+ end
- url = URI.parse(url) unless url.kind_of?(URI)
+ #
+ # The path on disk to the downloaded asset. This method requires the
+ # presence of a +source_uri+.
+ #
+ # @return [String]
+ #
+ def downloaded_file
+ filename = File.basename(source[:url], '?*')
+ File.join(Config.cache_dir, filename)
+ end
- req = Net::HTTP::Get.new(url.request_uri, headers)
+ #
+ # The checksum (+md5+) as defined by the user in the software definition.
+ #
+ # @return [String]
+ #
+ def checksum
+ source[:md5]
+ end
- http_client = if http_proxy && !excluded_from_proxy?(url.host)
- Net::HTTP::Proxy(http_proxy.host, http_proxy.port, http_proxy.user, http_proxy.password).new(url.host, url.port)
- else
- Net::HTTP.new(url.host, url.port)
- end
- http_client.use_ssl = (url.scheme == 'https')
+ private
- response = http_client.start { |http| http.request(req) }
- case response
- when Net::HTTPSuccess
- open(project_file, 'wb') do |f|
- f.write(response.body)
- end
- when Net::HTTPRedirection
- get_with_redirect(response['location'], headers, limit - 1)
+ #
+ # The URL from which to download the software - this comes from the
+ # software's +source :url+ value.
+ #
+ # If S3 caching is enabled, this is the download URL for the software from
+ # the S3 bucket as defined in the {Config}.
+ #
+ # @return [String]
+ #
+ def download_url
+ if Config.use_s3_caching
+ "http://#{Config.s3_bucket}.s3.amazonaws.com/#{S3Cache.key_for(software)}"
else
- response.error!
+ source[:url]
end
end
- # search environment variable as given, all lowercase and all upper case
- def get_env(name)
- ENV[name] || ENV[name.downcase] || ENV[name.upcase] || nil
- end
+ #
+ # Download the given file using Ruby's +OpenURI+ implementation. This method
+ # may emit warnings as defined in software definitions using the +:warning+
+ # key.
+ #
+ # @return [void]
+ #
+ def download
+ log.warn(log_key) { source[:warning] } if source.key?(:warning)
- # constructs a http_proxy uri from HTTP_PROXY* env vars
- def http_proxy
- @http_proxy ||= begin
- proxy = get_env('HTTP_PROXY') || return
- proxy = "http://#{proxy}" unless proxy =~ /^https?:/
- uri = URI.parse(proxy)
- uri.user ||= get_env('HTTP_PROXY_USER')
- uri.password ||= get_env('HTTP_PROXY_PASS')
- uri
+ headers = download_headers
+
+ if source[:unsafe]
+ log.warn(log_key) { "Permitting unsafe redirects!" }
+ headers[:allow_unsafe_redirects] = true
end
- end
- # return true if the host is excluded from proxying via the no_proxy directive.
- # the 'no_proxy' variable contains a list of host suffixes separated by comma
- # example: example.com,www.examle.org,localhost
- def excluded_from_proxy?(host)
- no_proxy = get_env('no_proxy') || ''
- no_proxy.split(/\s*,\s*/).any? { |pattern| host.end_with? pattern }
+ file = open(download_url, headers)
+ FileUtils.cp(file.path, downloaded_file)
+ file.close
+ rescue SocketError,
+ Errno::ECONNREFUSED,
+ Errno::ECONNRESET,
+ Errno::ENETUNREACH,
+ OpenURI::HTTPError => e
+ log.error(log_key) { "Download failed - #{e.class}!" }
+ raise
end
- def download
- tries = 5
- begin
- log.warn(log_key) { source[:warning] } if source.key?(:warning)
- log.info(log_key) { "Fetching #{project_file} from #{source_uri}" }
+ #
+ # Extract the downloaded file, using the magical logic based off of the
+ # ending file extension. In the rare event the file cannot be extracted, it
+ # is copied over as a raw file.
+ #
+ def extract
+ if command = extract_command
+ log.info(log_key) { "Extracting `#{downloaded_file}' to `#{Config.source_dir}'" }
+ shellout!(extract_command)
+ else
+ log.info(log_key) { "`#{downloaded_file}' is not an archive - copying to `#{project_dir}'" }
- case source_uri.scheme
- when /https?/
- headers = {
- 'accept-encoding' => '',
- }
- if source.key?(:cookie)
- headers['Cookie'] = source[:cookie]
- end
- get_with_redirect(source_uri, headers)
- when 'ftp'
- Net::FTP.open(source_uri.host) do |ftp|
- ftp.passive = true
- ftp.login
- ftp.getbinaryfile(source_uri.path, project_file)
- ftp.close
- end
+ if File.directory?(project_dir)
+ # If the file itself was a directory, copy the whole thing over. This
+ # seems unlikely, because I do not think it is a possible to download
+ # a folder, but better safe than sorry.
+ FileUtils.cp_r(downloaded_file, project_dir)
else
- raise UnsupportedURIScheme, "Don't know how to download from #{source_uri}"
+ # In the more likely case that we got a "regular" file, we want that
+ # file to live **inside** the project directory.
+ FileUtils.mkdir_p(project_dir)
+ FileUtils.cp(downloaded_file, "#{project_dir}/")
end
- rescue Exception
- tries -= 1
- if tries != 0
- log.debug(log_key) { "Retrying failed download (#{tries})..." }
- retry
- else
- raise
- end
end
- rescue Exception => e
- ErrorReporter.new(e, self).explain("Failed to fetch source from #source_uri (#{e.class}: #{e.message.strip})")
- raise
end
+ #
+ # Verify the downloaded file has the correct checksum.#
+ #
+ # @raise [ChecksumMismatch]
+ # if the checksum does not match
+ #
def verify_checksum!
- actual_md5 = Digest::MD5.file(project_file)
- unless actual_md5 == @checksum
- log.warn(log_key) { "Invalid MD5 for #{@name}" }
- log.warn(log_key) { "Expected: #{@checksum}" }
- log.warn(log_key) { "Actual: #{actual_md5}" }
- raise InvalidSourceFile, "Checksum of downloaded file #{project_file} doesn't match expected"
+ log.info(log_key) { 'Verifying checksum' }
+
+ expected = checksum
+ actual = digest(downloaded_file, :md5)
+
+ if expected != actual
+ raise ChecksumMismatch.new(software, expected, actual)
end
end
- def extract
- log.info(log_key) do
- "Extracting the source in '#{project_file}' to '#{Config.source_dir}'"
- end
+ #
+ # The command to use for extracting this piece of software.
+ #
+ # @return [String, nil]
+ #
+ def extract_command
+ if Ohai['platform'] == 'windows' && downloaded_file.end_with?(*WIN_7Z_EXTENSIONS)
+ "7z.exe x #{windows_safe_path(downloaded_file)} -o#{Config.source_dir} -r -y"
+ elsif Ohai['platform'] != 'windows' && downloaded_file.end_with?('.7z')
+ "7z x #{windows_safe_path(downloaded_file)} -o#{Config.source_dir} -r -y"
+ elsif Ohai['platform'] != 'windows' && downloaded_file.end_with?('.zip')
+ "unzip #{windows_safe_path(downloaded_file)} -d #{Config.source_dir}"
+ elsif downloaded_file.end_with?(*TAR_EXTENSIONS)
+ compression_switch = 'z' if downloaded_file.end_with?('gz')
+ compression_switch = 'j' if downloaded_file.end_with?('bz2')
+ compression_switch = 'J' if downloaded_file.end_with?('xz')
+ compression_switch = '' if downloaded_file.end_with?('tar')
- cmd = extract_cmd
- case cmd
- when Proc
- cmd.call
- when String
- shellout!(cmd)
- else
- raise "Don't know how to extract command for #{cmd.class} class"
+ "#{tar} #{compression_switch}xf #{windows_safe_path(downloaded_file)} -C#{Config.source_dir}"
end
- rescue Exception => e
- ErrorReporter.new(e, self).explain("Failed to unpack archive at #{project_file} (#{e.class}: #{e.message.strip})")
- raise
end
- def extract_cmd
- if Ohai['platform'] == 'windows' && project_file.end_with?(*WIN_7Z_EXTENSIONS)
- "7z.exe x #{project_file} -o#{Config.source_dir} -r -y"
- elsif Ohai['platform'] != 'windows' && project_file.end_with?('.7z')
- "7z x #{project_file} -o#{Config.source_dir} -r -y"
- elsif Ohai['platform'] != 'windows' && project_file.end_with?('.zip')
- "unzip #{project_file} -d #{Config.source_dir}"
- elsif project_file.end_with?(*TAR_EXTENSIONS)
- compression_switch = 'z' if project_file.end_with?('gz')
- compression_switch = 'j' if project_file.end_with?('bz2')
- compression_switch = 'J' if project_file.end_with?('xz')
- compression_switch = '' if project_file.end_with?('tar')
- "tar #{compression_switch}xf #{project_file} -C#{Config.source_dir}"
- else
- # if we don't recognize the extension, simply copy over the file
- proc do
- log.debug(log_key) do
- "'#{project_file}' is not an archive. Copying to '#{project_dir}'..."
- end
- # WARNING: hack hack hack, no project dir yet
- FileUtils.mkdir_p(project_dir)
- FileUtils.cp(project_file, project_dir)
- end
+ #
+ # Primitively determine whether we should use gtar or tar to untar a file.
+ # If gtar is present, we will use gtar (AIX). Otherwise, we fallback to tar.
+ #
+ # @return [String]
+ #
+ def tar
+ Omnibus.which('gtar') ? 'gtar' : 'tar'
+ end
+
+ #
+ # The list of headers to pass to the download.
+ #
+ # @return [Hash]
+ #
+ def download_headers
+ {}.tap do |h|
+ # Alright kids, sit down while grandpa tells you a story. Back when the
+ # Internet was just a series of tubes, and you had to "dial in" using
+ # this thing called a "modem", ancient astronaunt theorists (computer
+ # scientists) invented gzip to compress requests sent over said tubes
+ # and make the Internet faster.
+ #
+ # Fast forward to the year of broadband - ungzipping these files was
+ # tedious and hard, so Ruby and other client libraries decided to do it
+ # for you:
+ #
+ # https://github.com/ruby/ruby/blob/c49ae7/lib/net/http.rb#L1031-L1033
+ #
+ # Meanwhile, software manufacturers began automatically compressing
+ # their software for distribution as a +.tar.gz+, publishing the
+ # appropriate checksums accordingly.
+ #
+ # But consider... If a software manufacturer is publishing the checksum
+ # for a gzipped tarball, and the client is automatically ungzipping its
+ # responses, then checksums can (read: should) never match! Herein lies
+ # the bug that took many hours away from the lives of a once-happy
+ # developer.
+ #
+ # TL;DR - Do not let Ruby ungzip our file
+ #
+ h['Accept-Encoding'] = 'identity'
+
+ # Set the cookie if one was given
+ h['Cookie'] = source[:cookie] if source[:cookie]
end
end
end
end