# # Copyright 2012-2014 Chef Software, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # require 'fileutils' require 'open-uri' require 'ruby-progressbar' module Omnibus class NetFetcher < Fetcher # Use 7-zip to extract 7z/zip for Windows WIN_7Z_EXTENSIONS = %w(.7z .zip) # tar probably has compression scheme linked in, otherwise for tarballs COMPRESSED_TAR_EXTENSIONS = %w(.tar.gz .tgz tar.bz2 .tar.xz .txz .tar.lzma) TAR_EXTENSIONS = COMPRESSED_TAR_EXTENSIONS + ['.tar'] ALL_EXTENSIONS = WIN_7Z_EXTENSIONS + TAR_EXTENSIONS # Digest types used for verifying file checksums DIGESTS = [:sha512, :sha256, :sha1, :md5] # # A fetch is required if the downloaded_file (such as a tarball) does not # exist on disk, or if the checksum of the downloaded file is different # than the given checksum. # # @return [true, false] # def fetch_required? !(File.exist?(downloaded_file) && digest(downloaded_file, digest_type) == checksum) end # # The version identifier for this remote location. This is computed using # the name of the software, the version of the software, and the checksum. # # @return [String] # def version_guid "#{digest_type}:#{checksum}" end # # Clean the project directory if it exists and actually extract # the downloaded file. # # @return [true, false] # true if the project directory was removed, false otherwise # def clean needs_cleaning = File.exist?(project_dir) if needs_cleaning log.info(log_key) { "Cleaning project directory `#{project_dir}'" } FileUtils.rm_rf(project_dir) end create_required_directories deploy needs_cleaning end # # Fetch the given software definition. This method **always** fetches the # file, even if it already exists on disk! You should use {#fetch_required?} # to guard against this check in your implementation. # # @return [void] # def fetch log.info(log_key) { "Downloading from `#{download_url}'" } create_required_directories download verify_checksum! end # # The version for this item in the cache. This is the digest of downloaded # file and the URL where it was downloaded from. # # This method is called *before* clean but *after* fetch. Do not ever # use the contents of the project_dir here. # # @return [String] # def version_for_cache "download_url:#{source[:url]}|#{digest_type}:#{checksum}" end # # Returned the resolved version for the manifest. Since this is a # remote URL, there is no resolution, the version is what we said # it is. # # @return [String] # def self.resolve_version(version, source) version end # # The path on disk to the downloaded asset. This method requires the # presence of a +source_uri+. # # @return [String] # def downloaded_file filename = File.basename(source[:url], '?*') File.join(Config.cache_dir, filename) end # # The checksum as defined by the user in the software definition. # # @return [String] # def checksum source[digest_type] end private # # The URL from which to download the software - this comes from the # software's +source :url+ value. # # If S3 caching is enabled, this is the download URL for the software from # the S3 bucket as defined in the {Config}. # # @return [String] # def download_url if Config.use_s3_caching "http://#{Config.s3_bucket}.s3.amazonaws.com/#{S3Cache.key_for(self)}" else source[:url] end end # # Download the given file using Ruby's +OpenURI+ implementation. This method # may emit warnings as defined in software definitions using the +:warning+ # key. # # @return [void] # def download log.warn(log_key) { source[:warning] } if source.key?(:warning) options = download_headers if source[:unsafe] log.warn(log_key) { "Permitting unsafe redirects!" } options[:allow_unsafe_redirects] = true end options[:read_timeout] = Omnibus::Config.fetcher_read_timeout fetcher_retries ||= Omnibus::Config.fetcher_retries progress_bar = ProgressBar.create( output: $stdout, format: '%e %B %p%% (%r KB/sec)', rate_scale: ->(rate) { rate / 1024 }, ) reported_total = 0 options[:content_length_proc] = ->(total) { reported_total = total progress_bar.total = total } options[:progress_proc] = ->(step) { downloaded_amount = [step, reported_total].min progress_bar.progress = downloaded_amount } file = open(download_url, options) # This is a temporary file. Close and flush it before attempting to copy # it over. file.close FileUtils.cp(file.path, downloaded_file) file.unlink rescue SocketError, Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::ENETUNREACH, Timeout::Error, OpenURI::HTTPError => e if fetcher_retries != 0 log.info(log_key) { "Retrying failed download due to #{e} (#{fetcher_retries} retries left)..." } fetcher_retries -= 1 retry else log.error(log_key) { "Download failed - #{e.class}!" } raise end end # # Extract the downloaded file, using the magical logic based off of the # ending file extension. In the rare event the file cannot be extracted, it # is copied over as a raw file. # def deploy if downloaded_file.end_with?(*ALL_EXTENSIONS) log.info(log_key) { "Extracting `#{safe_downloaded_file}' to `#{safe_project_dir}'" } extract else log.info(log_key) { "`#{safe_downloaded_file}' is not an archive - copying to `#{safe_project_dir}'" } if File.directory?(downloaded_file) # If the file itself was a directory, copy the whole thing over. This # seems unlikely, because I do not think it is a possible to download # a folder, but better safe than sorry. FileUtils.cp_r("#{downloaded_file}/.", project_dir) else # In the more likely case that we got a "regular" file, we want that # file to live **inside** the project directory. project_dir should already # exist due to create_required_directories FileUtils.cp(downloaded_file, project_dir) end end end # # Extracts the downloaded archive file into project_dir. # # On windows, this is a fuster cluck and we allow users to specify the # preferred extractor to be used. The default is to use tar. User overrides # can be set in source[:extract] as: # :tar - use tar.exe and fail on errors (default strategy). # :seven_zip - use 7zip for all tar/compressed tar files on windows. # :lax_tar - use tar.exe on windows but ignore errors. # # Both 7z and bsdtar have issues on windows. # # 7z cannot extract and untar at the same time. You need to extract to a # temporary location and then extract again into project_dir. # # 7z also doesn't handle symlinks well. A symlink to a non-existent # location simply results in a text file with the target path written in # it. It does this without throwing any errors. # # bsdtar will exit(1) if it is encounters symlinks on windows. So we can't # use shellout! directly. # # bsdtar will also exit(1) and fail to overwrite files at the destination # during extraction if a file already exists at the destination and is # marked read-only. This used to be a problem when we weren't properly # cleaning an existing project_dir. It should be less of a problem now... # but who knows. # def extract # Only used by tar compression_switch = '' compression_switch = 'z' if downloaded_file.end_with?('gz') compression_switch = '--lzma -' if downloaded_file.end_with?('lzma') compression_switch = 'j' if downloaded_file.end_with?('bz2') compression_switch = 'J' if downloaded_file.end_with?('xz') if Ohai['platform'] == 'windows' if downloaded_file.end_with?(*TAR_EXTENSIONS) && source[:extract] != :seven_zip returns = [0] returns << 1 if source[:extract] == :lax_tar shellout!("tar.exe #{compression_switch}xf #{safe_downloaded_file} -C#{safe_project_dir}", returns: returns) elsif downloaded_file.end_with?(*COMPRESSED_TAR_EXTENSIONS) Dir.mktmpdir do |temp_dir| log.debug(log_key) { "Temporarily extracting `#{safe_downloaded_file}' to `#{temp_dir}'" } shellout!("7z.exe x #{safe_downloaded_file} -o#{windows_safe_path(temp_dir)} -r -y") fname = File.basename(downloaded_file, File.extname(downloaded_file)) fname << ".tar" if downloaded_file.end_with?('tgz', 'txz') next_file = windows_safe_path(File.join(temp_dir, fname)) log.debug(log_key) { "Temporarily extracting `#{next_file}' to `#{safe_project_dir}'" } shellout!("7z.exe x #{next_file} -o#{safe_project_dir} -r -y") end else shellout!("7z.exe x #{safe_downloaded_file} -o#{safe_project_dir} -r -y") end elsif downloaded_file.end_with?('.7z') shellout!("7z x #{safe_downloaded_file} -o#{safe_project_dir} -r -y") elsif downloaded_file.end_with?('.zip') shellout!("unzip #{safe_downloaded_file} -d #{safe_project_dir}") else shellout!("#{tar} #{compression_switch}xf #{safe_downloaded_file} -C#{safe_project_dir}") end end # # The digest type defined in the software definition # # @raise [ChecksumMissing] # if the checksum does not exist # # @return [Symbol] # def digest_type DIGESTS.each do |digest| return digest if source.key? digest end raise ChecksumMissing.new(self) end # # Verify the downloaded file has the correct checksum. # # @raise [ChecksumMismatch] # if the checksum does not match # def verify_checksum! log.info(log_key) { 'Verifying checksum' } expected = checksum actual = digest(downloaded_file, digest_type) if expected != actual raise ChecksumMismatch.new(self, expected, actual) end end def safe_project_dir windows_safe_path(project_dir) end def safe_downloaded_file windows_safe_path(downloaded_file) end # # The command to use for extracting this piece of software. # # @return [[String]] # def extract_command end # # Primitively determine whether we should use gtar or tar to untar a file. # If gtar is present, we will use gtar (AIX). Otherwise, we fallback to tar. # # @return [String] # def tar Omnibus.which('gtar') ? 'gtar' : 'tar' end # # The list of headers to pass to the download. # # @return [Hash] # def download_headers {}.tap do |h| # Alright kids, sit down while grandpa tells you a story. Back when the # Internet was just a series of tubes, and you had to "dial in" using # this thing called a "modem", ancient astronaunt theorists (computer # scientists) invented gzip to compress requests sent over said tubes # and make the Internet faster. # # Fast forward to the year of broadband - ungzipping these files was # tedious and hard, so Ruby and other client libraries decided to do it # for you: # # https://github.com/ruby/ruby/blob/c49ae7/lib/net/http.rb#L1031-L1033 # # Meanwhile, software manufacturers began automatically compressing # their software for distribution as a +.tar.gz+, publishing the # appropriate checksums accordingly. # # But consider... If a software manufacturer is publishing the checksum # for a gzipped tarball, and the client is automatically ungzipping its # responses, then checksums can (read: should) never match! Herein lies # the bug that took many hours away from the lives of a once-happy # developer. # # TL;DR - Do not let Ruby ungzip our file # h['Accept-Encoding'] = 'identity' # Set the cookie if one was given h['Cookie'] = source[:cookie] if source[:cookie] end end end end