modules/mu/groomers/chef.rb in cloud-mu-2.1.0beta vs modules/mu/groomers/chef.rb in cloud-mu-3.0.0beta
- old
+ new
@@ -69,11 +69,11 @@
end
}
require 'chef/knife/bootstrap_windows_winrm'
require 'chef/knife/bootstrap_windows_ssh'
::Chef::Config[:chef_server_url] = "https://#{MU.mu_public_addr}:7443/organizations/#{user}"
- if File.exists?("#{Etc.getpwnam(mu_user).dir}/.chef/knife.rb")
+ if File.exist?("#{Etc.getpwnam(mu_user).dir}/.chef/knife.rb")
MU.log "Loading Chef configuration from #{Etc.getpwnam(mu_user).dir}/.chef/knife.rb", MU::DEBUG
::Chef::Config.from_file("#{Etc.getpwnam(mu_user).dir}/.chef/knife.rb")
end
::Chef::Config[:environment] = env
::Chef::Config[:yes] = true
@@ -216,11 +216,11 @@
# @param vault [String]: A repository of secrets to delete
def self.deleteSecret(vault: nil, item: nil)
loadChefLib
raise MuError, "No vault specified, nothing to delete" if vault.nil?
MU.log "Deleting #{vault}:#{item} from vaults"
- knife_db = nil
+
knife_cmds = []
if item.nil?
knife_cmds << ::Chef::Knife::DataBagDelete.new(['data', 'bag', 'delete', vault])
else
knife_cmds << ::Chef::Knife::DataBagDelete.new(['data', 'bag', 'delete', vault, item])
@@ -268,11 +268,11 @@
end
saveDeployData
retries = 0
try_upgrade = false
- output = []
+ output_lines = []
error_signal = "CHEF EXITED BADLY: "+(0...25).map { ('a'..'z').to_a[rand(26)] }.join
runstart = nil
cmd = nil
ssh = nil
winrm = nil
@@ -292,13 +292,30 @@
cmd = "#{upgrade_cmd} chef-client --color || echo #{error_signal}"
end
Timeout::timeout(timeout) {
retval = ssh.exec!(cmd) { |ch, stream, data|
puts data
- output << data
- raise MU::Cloud::BootstrapTempFail if data.match(/REBOOT_SCHEDULED| WARN: Reboot requested:/)
- raise MU::Groomer::RunError, output.grep(/ ERROR: /).last if data.match(/#{error_signal}/)
+ output_lines << data
+ raise MU::Cloud::BootstrapTempFail if data.match(/REBOOT_SCHEDULED| WARN: Reboot requested:|Rebooting server at a recipe's request|Chef::Exceptions::Reboot/)
+ if data.match(/#{error_signal}/)
+ error_msg = ""
+ clip = false
+ output_lines.each { |chunk|
+ chunk.split(/\n/).each { |line|
+ if !clip and line.match(/^========+/)
+ clip = true
+ elsif clip and line.match(/^Running handlers:/)
+ break
+ end
+
+ if clip and line.match(/[a-z0-9]/)
+ error_msg += line.gsub(/\e\[(\d+)m/, '')+"\n"
+ end
+ }
+ }
+ raise MU::Groomer::RunError, error_msg
+ end
}
}
else
MU.log "Invoking Chef over WinRM on #{@server.mu_name}: #{purpose}"
winrm = @server.getWinRMSession(haveBootstrapped? ? 1 : max_retries)
@@ -312,34 +329,34 @@
raise MU::Groomer::RunError, "#{@server.mu_name} has a pending reboot"
end
if try_upgrade
pp winrm.run("Invoke-WebRequest -useb https://omnitruck.chef.io/install.ps1 | Invoke-Expression; Install-Project -version:#{MU.chefVersion} -download_directory:$HOME")
end
- output = []
+ output_lines = []
cmd = "c:/opscode/chef/bin/chef-client.bat --color"
if override_runlist
cmd = cmd + " -o '#{override_runlist}'"
end
resp = nil
Timeout::timeout(timeout) {
resp = winrm.run(cmd) do |stdout, stderr|
if stdout
print stdout if output
- output << stdout
+ output_lines << stdout
end
if stderr
MU.log stderr, MU::ERR
- output << stderr
+ output_lines << stderr
end
end
}
- if resp.exitcode == 1 and output.join("\n").match(/Chef Client finished/)
+ if resp.exitcode == 1 and output_lines.join("\n").match(/Chef Client finished/)
MU.log "resp.exit code 1"
elsif resp.exitcode != 0
- raise MU::Cloud::BootstrapTempFail if resp.exitcode == 35 or output.join("\n").match(/REBOOT_SCHEDULED| WARN: Reboot requested:/)
- raise MU::Groomer::RunError, output.slice(output.length-50, output.length).join("")
+ raise MU::Cloud::BootstrapTempFail if resp.exitcode == 35 or output_lines.join("\n").match(/REBOOT_SCHEDULED| WARN: Reboot requested:|Rebooting server at a recipe's request|Chef::Exceptions::Reboot/)
+ raise MU::Groomer::RunError, output_lines.slice(output_lines.length-50, output_lines.length).join("")
end
end
rescue MU::Cloud::BootstrapTempFail
MU.log "#{@server.mu_name} rebooting from Chef, waiting then resuming", MU::NOTICE
@@ -395,14 +412,16 @@
end
end
sleep 30
retry
else
+ @server.deploy.sendAdminSlack("Chef run '#{purpose}' failed on `#{@server.mu_name}` :crying_cat_face:", msg: e.message)
raise MU::Groomer::RunError, "#{@server.mu_name}: Chef run '#{purpose}' failed #{max_retries} times, last error was: #{e.message}"
end
rescue Exception => e
- raise MU::Groomer::RunError, "Caught unexpected #{e.inspect} on #{@server.mu_name} in @groomer.run"
+ @server.deploy.sendAdminSlack("Chef run '#{purpose}' failed on `#{@server.mu_name}` :crying_cat_face:", msg: e.inspect)
+ raise MU::Groomer::RunError, "Caught unexpected #{e.inspect} on #{@server.mu_name} in @groomer.run at #{e.backtrace[0]}"
end
saveDeployData
end
@@ -438,24 +457,37 @@
else
remove_cmd = "sudo yum -y erase chef ; sudo rpm -e chef ; sudo rm -rf /var/chef/ /etc/chef /opt/chef/ /usr/bin/chef-* ; sudo apt-get -y remove chef ; sudo touch /opt/mu_installed_chef"
end
guardfile = "/opt/mu_installed_chef"
- ssh = @server.getSSHSession(15)
- if leave_ours
- MU.log "Expunging pre-existing Chef install on #{@server.mu_name}, if we didn't create it", MU::NOTICE
- begin
- ssh.exec!(%Q{test -f #{guardfile} || (#{remove_cmd}) ; touch #{guardfile}})
- rescue IOError => e
- # TO DO - retry this in a cleaner way
- MU.log "Got #{e.inspect} while trying to clean up chef, retrying", MU::NOTICE, details: %Q{test -f #{guardfile} || (#{remove_cmd}) ; touch #{guardfile}}
- ssh = @server.getSSHSession(15)
- ssh.exec!(%Q{test -f #{guardfile} || (#{remove_cmd}) ; touch #{guardfile}})
+ retries = 0
+ begin
+ ssh = @server.getSSHSession(15)
+ Timeout::timeout(60) {
+ if leave_ours
+ MU.log "Expunging pre-existing Chef install on #{@server.mu_name}, if we didn't create it", MU::NOTICE
+ begin
+ ssh.exec!(%Q{test -f #{guardfile} || (#{remove_cmd}) ; touch #{guardfile}})
+ rescue IOError => e
+ # TO DO - retry this in a cleaner way
+ MU.log "Got #{e.inspect} while trying to clean up chef, retrying", MU::NOTICE, details: %Q{test -f #{guardfile} || (#{remove_cmd}) ; touch #{guardfile}}
+ ssh = @server.getSSHSession(15)
+ ssh.exec!(%Q{test -f #{guardfile} || (#{remove_cmd}) ; touch #{guardfile}})
+ end
+ else
+ MU.log "Expunging pre-existing Chef install on #{@server.mu_name}", MU::NOTICE
+ ssh.exec!(remove_cmd)
+ end
+ }
+ rescue Timeout::Error
+ if retries < 5
+ retries += 1
+ sleep 5
+ retry
+ else
+ raise MuError, "Failed to preClean #{@server.mu_name} after repeated timeouts"
end
- else
- MU.log "Expunging pre-existing Chef install on #{@server.mu_name}", MU::NOTICE
- ssh.exec!(remove_cmd)
end
ssh.close
else
remove_cmd = %Q{
@@ -521,10 +553,11 @@
# Bootstrap our server with Chef
def bootstrap
self.class.loadChefLib
stashHostSSLCertSecret
+ splunkVaultInit
if !@config['cleaned_chef']
begin
leave_ours = @config['scrub_groomer'] ? false : true
preClean(leave_ours)
rescue RuntimeError => e
@@ -652,12 +685,12 @@
rescue SystemExit => e
MU.log "#{@server.mu_name}: Run list removal of recipe[#{recipe}] failed with #{e.inspect}", MU::WARN
end
}
knifeAddToRunList("role[mu-node]")
+ knifeAddToRunList("mu-tools::selinux")
- splunkVaultInit
grantSecretAccess(@server.mu_name, "windows_credentials") if @server.windows?
grantSecretAccess(@server.mu_name, "ssl_cert")
saveChefMetadata
knifeAddToRunList("recipe[mu-tools::updates]") if !@config['skipinitialupdates']
@@ -667,10 +700,11 @@
run(purpose: "Base Windows configuration", update_runlist: false, max_retries: 20)
elsif !@config['skipinitialupdates']
run(purpose: "Base configuration", update_runlist: false, max_retries: 20)
end
::Chef::Knife.run(['node', 'run_list', 'remove', @server.mu_name, "recipe[mu-tools::updates]"], {}) if !@config['skipinitialupdates']
+ ::Chef::Knife.run(['node', 'run_list', 'remove', @server.mu_name, "recipe[mu-tools::selinux]"], {})
# This will deal with Active Directory integration.
if !@config['active_directory'].nil?
if @config['active_directory']['domain_operation'] == "join"
knifeAddToRunList("recipe[mu-activedirectory::domain-node]")
@@ -694,10 +728,15 @@
# Synchronize the deployment structure managed by {MU::MommaCat} to Chef,
# so that nodes can access this metadata.
# @return [Hash]: The data synchronized.
def saveDeployData
self.class.loadChefLib
+ if !haveBootstrapped?
+ MU.log "saveDeployData invoked on #{@server.to_s} before Chef has been bootstrapped!", MU::WARN, details: caller
+ return
+ end
+
@server.describe(update_cache: true) # Make sure we're fresh
saveChefMetadata
begin
chef_node = ::Chef::Node.load(@server.mu_name)
@@ -722,14 +761,16 @@
nodes_to_delete.each { |name|
@server.deploy.deployment['servers'][node_class].delete(name)
}
end
- if chef_node.normal['deployment'] != @server.deploy.deployment
+ if !@server.deploy.deployment.nil? and
+ (chef_node.normal['deployment'].nil? or
+ (chef_node.normal['deployment'].to_h <=> @server.deploy.deployment) != 0
+ )
MU.log "Updating node: #{@server.mu_name} deployment attributes", details: @server.deploy.deployment
chef_node.normal['deployment'].merge!(@server.deploy.deployment)
- chef_node.normal['deployment']['ssh_public_key'] = @server.deploy.ssh_public_key
chef_node.save
end
return chef_node['deployment']
rescue Net::HTTPServerException => e
MU.log "Attempted to save deployment to Chef node #{@server.mu_name} before it was bootstrapped.", MU::DEBUG
@@ -768,19 +809,28 @@
begin
knife_cd.run
rescue Net::HTTPServerException
end
end
+ MU.log "knife data bag delete #{node}"
+ if !noop
+ knife_cd = ::Chef::Knife::ClientDelete.new(['data', 'bag', 'delete', node])
+ knife_cd.config[:yes] = true
+ begin
+ knife_cd.run
+ rescue Net::HTTPServerException
+ end
+ end
return if nodeonly
begin
deleteSecret(vault: node) if !noop
rescue MuNoSuchSecret
end
["crt", "key", "csr"].each { |ext|
- if File.exists?("#{MU.mySSLDir}/#{node}.#{ext}")
+ if File.exist?("#{MU.mySSLDir}/#{node}.#{ext}")
MU.log "Removing #{MU.mySSLDir}/#{node}.#{ext}"
File.unlink("#{MU.mySSLDir}/#{node}.#{ext}") if !noop
end
}
end
@@ -810,19 +860,21 @@
MU.log "Saving #{@server.mu_name} Chef artifacts"
begin
chef_node = ::Chef::Node.load(@server.mu_name)
rescue Net::HTTPServerException
+ @server.deploy.sendAdminSlack("Couldn't load Chef metadata on `#{@server.mu_name}` :crying_cat_face:")
raise MU::Groomer::RunError, "Couldn't load Chef node #{@server.mu_name}"
end
# Figure out what this node thinks its name is
system_name = chef_node['fqdn'] if !chef_node['fqdn'].nil?
MU.log "#{@server.mu_name} local name is #{system_name}", MU::DEBUG
chef_node.normal.app = @config['application_cookbook'] if !@config['application_cookbook'].nil?
chef_node.normal["service_name"] = @config["name"]
+ chef_node.normal["credentials"] = @config["credentials"]
chef_node.normal["windows_admin_username"] = @config['windows_admin_username']
chef_node.chef_environment = MU.environment.downcase
if @server.config['cloud'] == "AWS"
chef_node.normal["ec2"] = MU.structToHash(@server.cloud_desc)
end
@@ -963,40 +1015,40 @@
self.class.loadChefLib
return if rl_entry.nil? and multiple.size == 0
if multiple.size == 0
multiple = [rl_entry]
end
- multiple.each { |rl_entry|
- if !rl_entry.match(/^role|recipe\[/)
- rl_entry = "#{type}[#{rl_entry}]"
+ multiple.each { |entry|
+ if !entry.match(/^role|recipe\[/)
+ entry = "#{type}[#{entry}]"
end
}
if !ignore_missing
role_list = nil
recipe_list = nil
missing = false
- multiple.each { |rl_entry|
- # Rather than argue about whether to expect a bare rl_entry name or
- # require rl_entry[rolename], let's just accomodate.
- if rl_entry.match(/^role\[(.+?)\]/)
- rl_entry_name = Regexp.last_match(1)
+ multiple.each { |entry|
+ # Rather than argue about whether to expect a bare entry name or
+ # require entry[rolename], let's just accomodate.
+ if entry.match(/^role\[(.+?)\]/)
+ entry_name = Regexp.last_match(1)
if role_list.nil?
query=%Q{#{MU::Groomer::Chef.knife} role list};
role_list = %x{#{query}}
end
- if !role_list.match(/(^|\n)#{rl_entry_name}($|\n)/)
- MU.log "Attempting to add non-existent #{rl_entry} to #{@server.mu_name}", MU::WARN
+ if !role_list.match(/(^|\n)#{entry_name}($|\n)/)
+ MU.log "Attempting to add non-existent #{entry} to #{@server.mu_name}", MU::WARN
missing = true
end
- elsif rl_entry.match(/^recipe\[(.+?)\]/)
- rl_entry_name = Regexp.last_match(1)
+ elsif entry.match(/^recipe\[(.+?)\]/)
+ entry_name = Regexp.last_match(1)
if recipe_list.nil?
query=%Q{#{MU::Groomer::Chef.knife} recipe list};
recipe_list = %x{#{query}}
end
- if !recipe_list.match(/(^|\n)#{rl_entry_name}($|\n)/)
- MU.log "Attempting to add non-existent #{rl_entry} to #{@server.mu_name}", MU::WARN
+ if !recipe_list.match(/(^|\n)#{entry_name}($|\n)/)
+ MU.log "Attempting to add non-existent #{entry} to #{@server.mu_name}", MU::WARN
missing = true
end
end
if missing and !ignore_missing