modules/mu/groomers/chef.rb in cloud-mu-2.1.0beta vs modules/mu/groomers/chef.rb in cloud-mu-3.0.0beta

- old
+ new

@@ -69,11 +69,11 @@ end } require 'chef/knife/bootstrap_windows_winrm' require 'chef/knife/bootstrap_windows_ssh' ::Chef::Config[:chef_server_url] = "https://#{MU.mu_public_addr}:7443/organizations/#{user}" - if File.exists?("#{Etc.getpwnam(mu_user).dir}/.chef/knife.rb") + if File.exist?("#{Etc.getpwnam(mu_user).dir}/.chef/knife.rb") MU.log "Loading Chef configuration from #{Etc.getpwnam(mu_user).dir}/.chef/knife.rb", MU::DEBUG ::Chef::Config.from_file("#{Etc.getpwnam(mu_user).dir}/.chef/knife.rb") end ::Chef::Config[:environment] = env ::Chef::Config[:yes] = true @@ -216,11 +216,11 @@ # @param vault [String]: A repository of secrets to delete def self.deleteSecret(vault: nil, item: nil) loadChefLib raise MuError, "No vault specified, nothing to delete" if vault.nil? MU.log "Deleting #{vault}:#{item} from vaults" - knife_db = nil + knife_cmds = [] if item.nil? knife_cmds << ::Chef::Knife::DataBagDelete.new(['data', 'bag', 'delete', vault]) else knife_cmds << ::Chef::Knife::DataBagDelete.new(['data', 'bag', 'delete', vault, item]) @@ -268,11 +268,11 @@ end saveDeployData retries = 0 try_upgrade = false - output = [] + output_lines = [] error_signal = "CHEF EXITED BADLY: "+(0...25).map { ('a'..'z').to_a[rand(26)] }.join runstart = nil cmd = nil ssh = nil winrm = nil @@ -292,13 +292,30 @@ cmd = "#{upgrade_cmd} chef-client --color || echo #{error_signal}" end Timeout::timeout(timeout) { retval = ssh.exec!(cmd) { |ch, stream, data| puts data - output << data - raise MU::Cloud::BootstrapTempFail if data.match(/REBOOT_SCHEDULED| WARN: Reboot requested:/) - raise MU::Groomer::RunError, output.grep(/ ERROR: /).last if data.match(/#{error_signal}/) + output_lines << data + raise MU::Cloud::BootstrapTempFail if data.match(/REBOOT_SCHEDULED| WARN: Reboot requested:|Rebooting server at a recipe's request|Chef::Exceptions::Reboot/) + if data.match(/#{error_signal}/) + error_msg = "" + clip = false + output_lines.each { |chunk| + chunk.split(/\n/).each { |line| + if !clip and line.match(/^========+/) + clip = true + elsif clip and line.match(/^Running handlers:/) + break + end + + if clip and line.match(/[a-z0-9]/) + error_msg += line.gsub(/\e\[(\d+)m/, '')+"\n" + end + } + } + raise MU::Groomer::RunError, error_msg + end } } else MU.log "Invoking Chef over WinRM on #{@server.mu_name}: #{purpose}" winrm = @server.getWinRMSession(haveBootstrapped? ? 1 : max_retries) @@ -312,34 +329,34 @@ raise MU::Groomer::RunError, "#{@server.mu_name} has a pending reboot" end if try_upgrade pp winrm.run("Invoke-WebRequest -useb https://omnitruck.chef.io/install.ps1 | Invoke-Expression; Install-Project -version:#{MU.chefVersion} -download_directory:$HOME") end - output = [] + output_lines = [] cmd = "c:/opscode/chef/bin/chef-client.bat --color" if override_runlist cmd = cmd + " -o '#{override_runlist}'" end resp = nil Timeout::timeout(timeout) { resp = winrm.run(cmd) do |stdout, stderr| if stdout print stdout if output - output << stdout + output_lines << stdout end if stderr MU.log stderr, MU::ERR - output << stderr + output_lines << stderr end end } - if resp.exitcode == 1 and output.join("\n").match(/Chef Client finished/) + if resp.exitcode == 1 and output_lines.join("\n").match(/Chef Client finished/) MU.log "resp.exit code 1" elsif resp.exitcode != 0 - raise MU::Cloud::BootstrapTempFail if resp.exitcode == 35 or output.join("\n").match(/REBOOT_SCHEDULED| WARN: Reboot requested:/) - raise MU::Groomer::RunError, output.slice(output.length-50, output.length).join("") + raise MU::Cloud::BootstrapTempFail if resp.exitcode == 35 or output_lines.join("\n").match(/REBOOT_SCHEDULED| WARN: Reboot requested:|Rebooting server at a recipe's request|Chef::Exceptions::Reboot/) + raise MU::Groomer::RunError, output_lines.slice(output_lines.length-50, output_lines.length).join("") end end rescue MU::Cloud::BootstrapTempFail MU.log "#{@server.mu_name} rebooting from Chef, waiting then resuming", MU::NOTICE @@ -395,14 +412,16 @@ end end sleep 30 retry else + @server.deploy.sendAdminSlack("Chef run '#{purpose}' failed on `#{@server.mu_name}` :crying_cat_face:", msg: e.message) raise MU::Groomer::RunError, "#{@server.mu_name}: Chef run '#{purpose}' failed #{max_retries} times, last error was: #{e.message}" end rescue Exception => e - raise MU::Groomer::RunError, "Caught unexpected #{e.inspect} on #{@server.mu_name} in @groomer.run" + @server.deploy.sendAdminSlack("Chef run '#{purpose}' failed on `#{@server.mu_name}` :crying_cat_face:", msg: e.inspect) + raise MU::Groomer::RunError, "Caught unexpected #{e.inspect} on #{@server.mu_name} in @groomer.run at #{e.backtrace[0]}" end saveDeployData end @@ -438,24 +457,37 @@ else remove_cmd = "sudo yum -y erase chef ; sudo rpm -e chef ; sudo rm -rf /var/chef/ /etc/chef /opt/chef/ /usr/bin/chef-* ; sudo apt-get -y remove chef ; sudo touch /opt/mu_installed_chef" end guardfile = "/opt/mu_installed_chef" - ssh = @server.getSSHSession(15) - if leave_ours - MU.log "Expunging pre-existing Chef install on #{@server.mu_name}, if we didn't create it", MU::NOTICE - begin - ssh.exec!(%Q{test -f #{guardfile} || (#{remove_cmd}) ; touch #{guardfile}}) - rescue IOError => e - # TO DO - retry this in a cleaner way - MU.log "Got #{e.inspect} while trying to clean up chef, retrying", MU::NOTICE, details: %Q{test -f #{guardfile} || (#{remove_cmd}) ; touch #{guardfile}} - ssh = @server.getSSHSession(15) - ssh.exec!(%Q{test -f #{guardfile} || (#{remove_cmd}) ; touch #{guardfile}}) + retries = 0 + begin + ssh = @server.getSSHSession(15) + Timeout::timeout(60) { + if leave_ours + MU.log "Expunging pre-existing Chef install on #{@server.mu_name}, if we didn't create it", MU::NOTICE + begin + ssh.exec!(%Q{test -f #{guardfile} || (#{remove_cmd}) ; touch #{guardfile}}) + rescue IOError => e + # TO DO - retry this in a cleaner way + MU.log "Got #{e.inspect} while trying to clean up chef, retrying", MU::NOTICE, details: %Q{test -f #{guardfile} || (#{remove_cmd}) ; touch #{guardfile}} + ssh = @server.getSSHSession(15) + ssh.exec!(%Q{test -f #{guardfile} || (#{remove_cmd}) ; touch #{guardfile}}) + end + else + MU.log "Expunging pre-existing Chef install on #{@server.mu_name}", MU::NOTICE + ssh.exec!(remove_cmd) + end + } + rescue Timeout::Error + if retries < 5 + retries += 1 + sleep 5 + retry + else + raise MuError, "Failed to preClean #{@server.mu_name} after repeated timeouts" end - else - MU.log "Expunging pre-existing Chef install on #{@server.mu_name}", MU::NOTICE - ssh.exec!(remove_cmd) end ssh.close else remove_cmd = %Q{ @@ -521,10 +553,11 @@ # Bootstrap our server with Chef def bootstrap self.class.loadChefLib stashHostSSLCertSecret + splunkVaultInit if !@config['cleaned_chef'] begin leave_ours = @config['scrub_groomer'] ? false : true preClean(leave_ours) rescue RuntimeError => e @@ -652,12 +685,12 @@ rescue SystemExit => e MU.log "#{@server.mu_name}: Run list removal of recipe[#{recipe}] failed with #{e.inspect}", MU::WARN end } knifeAddToRunList("role[mu-node]") + knifeAddToRunList("mu-tools::selinux") - splunkVaultInit grantSecretAccess(@server.mu_name, "windows_credentials") if @server.windows? grantSecretAccess(@server.mu_name, "ssl_cert") saveChefMetadata knifeAddToRunList("recipe[mu-tools::updates]") if !@config['skipinitialupdates'] @@ -667,10 +700,11 @@ run(purpose: "Base Windows configuration", update_runlist: false, max_retries: 20) elsif !@config['skipinitialupdates'] run(purpose: "Base configuration", update_runlist: false, max_retries: 20) end ::Chef::Knife.run(['node', 'run_list', 'remove', @server.mu_name, "recipe[mu-tools::updates]"], {}) if !@config['skipinitialupdates'] + ::Chef::Knife.run(['node', 'run_list', 'remove', @server.mu_name, "recipe[mu-tools::selinux]"], {}) # This will deal with Active Directory integration. if !@config['active_directory'].nil? if @config['active_directory']['domain_operation'] == "join" knifeAddToRunList("recipe[mu-activedirectory::domain-node]") @@ -694,10 +728,15 @@ # Synchronize the deployment structure managed by {MU::MommaCat} to Chef, # so that nodes can access this metadata. # @return [Hash]: The data synchronized. def saveDeployData self.class.loadChefLib + if !haveBootstrapped? + MU.log "saveDeployData invoked on #{@server.to_s} before Chef has been bootstrapped!", MU::WARN, details: caller + return + end + @server.describe(update_cache: true) # Make sure we're fresh saveChefMetadata begin chef_node = ::Chef::Node.load(@server.mu_name) @@ -722,14 +761,16 @@ nodes_to_delete.each { |name| @server.deploy.deployment['servers'][node_class].delete(name) } end - if chef_node.normal['deployment'] != @server.deploy.deployment + if !@server.deploy.deployment.nil? and + (chef_node.normal['deployment'].nil? or + (chef_node.normal['deployment'].to_h <=> @server.deploy.deployment) != 0 + ) MU.log "Updating node: #{@server.mu_name} deployment attributes", details: @server.deploy.deployment chef_node.normal['deployment'].merge!(@server.deploy.deployment) - chef_node.normal['deployment']['ssh_public_key'] = @server.deploy.ssh_public_key chef_node.save end return chef_node['deployment'] rescue Net::HTTPServerException => e MU.log "Attempted to save deployment to Chef node #{@server.mu_name} before it was bootstrapped.", MU::DEBUG @@ -768,19 +809,28 @@ begin knife_cd.run rescue Net::HTTPServerException end end + MU.log "knife data bag delete #{node}" + if !noop + knife_cd = ::Chef::Knife::ClientDelete.new(['data', 'bag', 'delete', node]) + knife_cd.config[:yes] = true + begin + knife_cd.run + rescue Net::HTTPServerException + end + end return if nodeonly begin deleteSecret(vault: node) if !noop rescue MuNoSuchSecret end ["crt", "key", "csr"].each { |ext| - if File.exists?("#{MU.mySSLDir}/#{node}.#{ext}") + if File.exist?("#{MU.mySSLDir}/#{node}.#{ext}") MU.log "Removing #{MU.mySSLDir}/#{node}.#{ext}" File.unlink("#{MU.mySSLDir}/#{node}.#{ext}") if !noop end } end @@ -810,19 +860,21 @@ MU.log "Saving #{@server.mu_name} Chef artifacts" begin chef_node = ::Chef::Node.load(@server.mu_name) rescue Net::HTTPServerException + @server.deploy.sendAdminSlack("Couldn't load Chef metadata on `#{@server.mu_name}` :crying_cat_face:") raise MU::Groomer::RunError, "Couldn't load Chef node #{@server.mu_name}" end # Figure out what this node thinks its name is system_name = chef_node['fqdn'] if !chef_node['fqdn'].nil? MU.log "#{@server.mu_name} local name is #{system_name}", MU::DEBUG chef_node.normal.app = @config['application_cookbook'] if !@config['application_cookbook'].nil? chef_node.normal["service_name"] = @config["name"] + chef_node.normal["credentials"] = @config["credentials"] chef_node.normal["windows_admin_username"] = @config['windows_admin_username'] chef_node.chef_environment = MU.environment.downcase if @server.config['cloud'] == "AWS" chef_node.normal["ec2"] = MU.structToHash(@server.cloud_desc) end @@ -963,40 +1015,40 @@ self.class.loadChefLib return if rl_entry.nil? and multiple.size == 0 if multiple.size == 0 multiple = [rl_entry] end - multiple.each { |rl_entry| - if !rl_entry.match(/^role|recipe\[/) - rl_entry = "#{type}[#{rl_entry}]" + multiple.each { |entry| + if !entry.match(/^role|recipe\[/) + entry = "#{type}[#{entry}]" end } if !ignore_missing role_list = nil recipe_list = nil missing = false - multiple.each { |rl_entry| - # Rather than argue about whether to expect a bare rl_entry name or - # require rl_entry[rolename], let's just accomodate. - if rl_entry.match(/^role\[(.+?)\]/) - rl_entry_name = Regexp.last_match(1) + multiple.each { |entry| + # Rather than argue about whether to expect a bare entry name or + # require entry[rolename], let's just accomodate. + if entry.match(/^role\[(.+?)\]/) + entry_name = Regexp.last_match(1) if role_list.nil? query=%Q{#{MU::Groomer::Chef.knife} role list}; role_list = %x{#{query}} end - if !role_list.match(/(^|\n)#{rl_entry_name}($|\n)/) - MU.log "Attempting to add non-existent #{rl_entry} to #{@server.mu_name}", MU::WARN + if !role_list.match(/(^|\n)#{entry_name}($|\n)/) + MU.log "Attempting to add non-existent #{entry} to #{@server.mu_name}", MU::WARN missing = true end - elsif rl_entry.match(/^recipe\[(.+?)\]/) - rl_entry_name = Regexp.last_match(1) + elsif entry.match(/^recipe\[(.+?)\]/) + entry_name = Regexp.last_match(1) if recipe_list.nil? query=%Q{#{MU::Groomer::Chef.knife} recipe list}; recipe_list = %x{#{query}} end - if !recipe_list.match(/(^|\n)#{rl_entry_name}($|\n)/) - MU.log "Attempting to add non-existent #{rl_entry} to #{@server.mu_name}", MU::WARN + if !recipe_list.match(/(^|\n)#{entry_name}($|\n)/) + MU.log "Attempting to add non-existent #{entry} to #{@server.mu_name}", MU::WARN missing = true end end if missing and !ignore_missing