lib/osc/machete/torque_helper.rb in osc-machete-1.1.4 vs lib/osc/machete/torque_helper.rb in osc-machete-1.2.0

- old
+ new

@@ -5,22 +5,36 @@ # "implements an idiomatic Ruby interface for common UNIX shell commands" # also helps to have these separate so we can use a mock shell for unit tests # # == FIXME: This contains no state whatsoever. It should probably be changed into a module. class OSC::Machete::TorqueHelper + # FIXME: Use ood_cluster gem + LIB = ENV['TORQUE_LIB'] || '/opt/torque/lib64' + BIN = ENV['TORQUE_BIN'] || '/opt/torque/bin' + HOSTS = { + 'oakley' => 'oak-batch.osc.edu', + 'ruby' => 'ruby-batch.osc.edu', + 'quick' => 'quick-batch.osc.edu', + 'owens' => 'owens-batch.ten.osc.edu', + :default => 'oak-batch.osc.edu' + } - # Alias to initialize a new object. - def self.default - self::new() + class << self + #@!attribute default + # @return [TorqueHelper] default TorqueHelper instance to use + attr_writer :default + def default + @default ||= self::new() + end end # Returns an OSC::Machete::Status ValueObject for a char # # @param [String] char The Torque status char # - # @example Completed - # status_for_char("C") #=> OSC::Machete::Status.completed + # @example Passed + # status_for_char("C") #=> OSC::Machete::Status.passed # @example Queued # status_for_char("W") #=> OSC::Machete::Status.queued # # @return [OSC::Machete::Status] The status corresponding to the char def status_for_char(char) @@ -49,16 +63,10 @@ # Where depends_on is a hash with key being dependency type and array containing the # arguments. See documentation on dependency_list in qsub man pages for details. # # Bills against the project specified by the primary group of the user. def qsub(script, host: nil, depends_on: {}, account_string: nil) - # if the script is set to run on Oakley in PBS headers - # this is to obviate current torque filter defect in which - # a script with PBS header set to specify oak-batch ends - # isn't properly handled and the job gets limited to 4GB - pbs_job = get_pbs_job( host.nil? ? get_pbs_conn(script: script) : get_pbs_conn(host: host) ) - headers = { depend: qsub_dependencies_header(depends_on) } headers.clear if headers[:depend].empty? # currently we set the billable project to the name of the primary group # this will probably be both SUPERCOMPUTER CENTER SPECIFIC and must change @@ -68,11 +76,11 @@ headers[PBS::ATTR[:A]] = account_string elsif account_string_valid_project?(default_account_string) headers[PBS::ATTR[:A]] = default_account_string end - pbs_job.submit(file: script, headers: headers, qsub: true).id + pbs(host: host, script: script).submit_script(script, headers: headers, qsub: true) end # convert dependencies hash to a PBS header string def qsub_dependencies_header(depends_on = {}) depends_on.map { |x| @@ -101,90 +109,71 @@ # # @param [String] pbsid The pbsid of the job to inspect. # # @return [Status] The job state def qstat(pbsid, host: nil) - - # Create a PBS::Job object based on the pbsid or the optional host param - pbs_conn = host.nil? ? get_pbs_conn(pbsid: pbsid.to_s) : get_pbs_conn(host: host) - pbs_job = get_pbs_job(pbs_conn, pbsid) - - job_status = pbs_job.status - # Get the status char value from the job. - status_for_char job_status[:attribs][:job_state][0] - - rescue PBS::UnkjobidError => err + id = pbsid.to_s + status = pbs(host: host, id: id).get_job(id, filters: [:job_state]) + status_for_char status[id][:job_state][0] # get status from status char value + rescue PBS::UnkjobidError OSC::Machete::Status.passed end # Perform a qdel command on a single job. # # @param [String] pbsid The pbsid of the job to be deleted. # # @return [nil] def qdel(pbsid, host: nil) - - pbs_conn = host.nil? ? get_pbs_conn(pbsid: pbsid.to_s) : get_pbs_conn(host: host) - pbs_job = get_pbs_job(pbs_conn, pbsid.to_s) - - pbs_job.delete - - rescue PBS::UnkjobidError => err + id = pbsid.to_s + pbs(host: host, id: id).delete_job(id) + rescue PBS::UnkjobidError # Common use case where trying to delete a job that is no longer in the system. end - private + def pbs(host: nil, id: nil, script: nil) + if host + # actually check if host is "oakley" i.e. a cluster key + host = HOSTS.fetch(host.to_s, host.to_s) + else + # try to determine host + key = host_from_pbsid(id) if id + key = host_from_script_pbs_header(script) if script && key.nil? - # Factory to return a PBS::Job object - def get_pbs_job(conn, pbsid=nil) - pbsid.nil? ? PBS::Job.new(conn: conn) : PBS::Job.new(conn: conn, id: pbsid.to_s) + host = HOSTS.fetch(key, HOSTS.fetch(:default)) end - # Returns a PBS connection object - # - # @option [:script] A PBS script with headers as string - # @option [:pbsid] A valid pbsid as string - # - # @return [PBS::Conn] A connection option for the PBS host (Default: Oakley) - def get_pbs_conn(options={}) - if options[:script] - PBS::Conn.batch(host_from_script_pbs_header(options[:script])) - elsif options[:pbsid] - PBS::Conn.batch(host_from_pbsid(options[:pbsid])) - elsif options[:host] - PBS::Conn.batch(options[:host]) - else - PBS::Conn.batch("oakley") - end - end + PBS::Batch.new( + host: host, + lib: LIB, + bin: BIN + ) + end + private # return the name of the host to use based on the pbs header # TODO: Think of a more efficient way to do this. def host_from_script_pbs_header(script) if (File.open(script) { |f| f.read =~ /#PBS -q @oak-batch/ }) "oakley" - elsif (File.open(script) { |f| f.read =~ /#PBS -q @opt-batch/ }) - "glenn" elsif (File.open(script) { |f| f.read =~ /#PBS -q @ruby-batch/ }) "ruby" elsif (File.open(script) { |f| f.read =~ /#PBS -q @quick-batch/ }) "quick" - else - "oakley" # DEFAULT + elsif (File.open(script) { |f| f.read =~ /#PBS -q @owens-batch/ }) + "owens" end end # Return the PBS host string based on a full pbsid string def host_from_pbsid(pbsid) if (pbsid =~ /oak-batch/ ) "oakley" - elsif (pbsid =~ /opt-batch/ ) - "glenn" elsif (pbsid.to_s =~ /^\d+$/ ) "ruby" elsif (pbsid =~ /quick/ ) "quick" - else - "oakley" # DEFAULT + elsif (pbsid =~ /owens/ ) + "owens" end end end