lib/rvc/modules/diagnostics.rb in rvc-1.7.0 vs lib/rvc/modules/diagnostics.rb in rvc-1.8.0

- old
+ new

@@ -16,10 +16,12 @@ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. +require 'rvc/vim' + DEFAULT_SERVER_PLACEHOLDER = '0.0.0.0' def wait_for_multiple_tasks tasks, timeout if tasks == [] return [] @@ -46,35 +48,130 @@ [task, result] end] results end +# http://stackoverflow.com/questions/3386233/how-to-get-exit-status-with-rubys-netssh-library +def ssh_exec!(ssh, command) + stdout_data = "" + stderr_data = "" + exit_code = nil + exit_signal = nil + ssh.open_channel do |channel| + channel.exec(command) do |ch, success| + unless success + abort "FAILED: couldn't execute command (ssh.channel.exec)" + end + channel.on_data do |ch,data| + stdout_data+=data + end + channel.on_extended_data do |ch,type,data| + stderr_data+=data + end + + channel.on_request("exit-status") do |ch,data| + exit_code = data.read_long + end + + end + end + ssh.loop + [stdout_data, stderr_data, exit_code] +end + +opts :restart_services do + summary "Restart all services in hosts" + arg :cluster, nil, :lookup => VIM::ComputeResource, :multi => true + opt :host, "Host name (multi ok)", type: :string, short: 'n', :multi => true + opt :password, "Host password (multi ok)", type: :string, short: 'p', :multi => true +end + +def restart_services clusters, opts + hosts = opts[:host] + pwds = opts[:password] + puts "Need to specify password(s) for fixing" if pwds == [] + + hosts.each do |host| + finished = false + pwds.each do |pwd| + break if finished + puts "\nTrying restart #{host} with pwd #{pwd}" + begin + Net::SSH.start(host, "root", :password => pwd, :paranoid => false) do |ssh| + # HZ 1258412 discusses the commands to fix a node with hostd problems + cmd = "/sbin/chkconfig usbarbitrator off" + puts "Running #{cmd}" + out = ssh_exec!(ssh,cmd) + if out[2] != 0 + puts "Failed to execute #{cmd} on host #{host}" + puts out[1] + end + + cmd = "/sbin/services.sh restart > /tmp/restart_services.log 2>&1" + puts "Running #{cmd}" + out = ssh_exec!(ssh,cmd) + if out[2] != 0 + puts "Failed to restart all services on host #{host}" + puts out[1] + else + puts "Host #{host} restarted all services" + finished = true + end + end + rescue Net::SSH::AuthenticationFailed + puts "Failed to authenticate on host #{host}" + end + end + end +end + opts :vm_create do summary "Check that VMs can be created on all hosts in a cluster" arg :cluster, nil, :lookup => VIM::ComputeResource, :multi => true opt :datastore, "Datastore to put (temporary) VMs on", :lookup => VIM::Datastore opt :vm_folder, "VM Folder to place (temporary) VMs in", :lookup => VIM::Folder opt :timeout, "Time to wait for VM creation to finish", :type => :int, :default => 3 * 60 + opt :fix, "Fix the failing ESX hosts", :type => :boolean , :default => false + opt :password, "Passwords for fixing hosts", :type => :string, short: 'p', :multi => true end def vm_create clusters, opts datastore = opts[:datastore] vm_folder = opts[:vm_folder] err "datastore is a required parameter" unless datastore err "vm_folder is a required parameter" unless vm_folder puts "Creating one VM per host ... (timeout = #{opts[:timeout]} sec)" - result = _vm_create clusters, datastore, vm_folder, opts - - errors = result.select{|h, x| x['status'] != 'green'} - errors.each do |host, info| - puts "Failed to create VM on host #{host} (in cluster #{info['cluster']}): #{info['error']}" + errors = [] + failed_hosts = [] + begin + result = _vm_create clusters, datastore, vm_folder, opts + errors = result.select{|h, x| x['status'] != 'green'} + errors.each do |host, info| + puts "Failed to create VM on host #{host} (in cluster #{info['cluster']}): #{info['error']}" + err_msgs = ["Timed out", "InvalidState", "InvalidHostState", "InvalidHostConnectionState", "HostCommunication"] + err_msgs.each do |msg| + if info['error'].include? msg + failed_hosts << host + break + end + end + end + rescue Exception => e + puts "An error occurred:\n" + puts "e.message:", e.message + puts "e.backtrace:", e.backtrace.join("\n") + errors = [e] end if errors.length == 0 puts "Success" end + if opts[:fix] && failed_hosts != [] + opts[:host] = failed_hosts + restart_services(clusters, opts) + end end def _vm_create clusters, datastore, vm_folder, opts = {} pc = datastore._connection.serviceContent.propertyCollector datastore_path = "[#{datastore.name}]" @@ -97,10 +194,11 @@ :name => "VM-on-#{hosts_props[host]['name']}-#{run}", :guestId => 'otherGuest', :files => { :vmPathName => datastore_path }, :numCPUs => 1, :memoryMB => 16, + :annotation => YAML.dump({'lease' => Time.now + 2 * opts[:timeout] + 60}), :deviceChange => [ { :operation => :add, :device => VIM.VirtualCdrom( :key => -2, @@ -116,15 +214,19 @@ :unitNumber => 0 ) } ], } - task = vm_folder.CreateVM_Task(:config => config, - :pool => rp, - :host => host) - tasks_map[task] = host - hosts_infos[host][:create_task] = task + begin + task = vm_folder.CreateVM_Task(:config => config, + :pool => rp, + :host => host) + tasks_map[task] = host + hosts_infos[host][:create_task] = task + rescue + puts "Failed to create task for host #{host.name}" + end end end create_tasks = tasks_map.keys create_results = wait_for_multiple_tasks create_tasks, opts[:timeout] @@ -146,24 +248,30 @@ cluster = cluster_host_map[host] cluster_props = clusters_props[cluster] result = host_info[:create_result] result = host_info[:destroy_result] if result.is_a?(VIM::VirtualMachine) + error_detail = nil if result == nil error_str = nil status = 'green' elsif result.is_a?(String) error_str = result status = 'red' else error_str = "#{result.fault.class.wsdl_name}: #{result.localizedMessage}" status = 'red' + begin + error_detail = result.fault.faultMessage + rescue + end end out[host_props['name']] = { 'cluster' => cluster_props['name'], 'status' => status, - 'error' => error_str + 'error' => error_str, + 'error_detail' => error_detail, } end out end