lib/osc/machete/torque_helper.rb in osc-machete-1.1.4 vs lib/osc/machete/torque_helper.rb in osc-machete-1.2.0
- old
+ new
@@ -5,22 +5,36 @@
# "implements an idiomatic Ruby interface for common UNIX shell commands"
# also helps to have these separate so we can use a mock shell for unit tests
#
# == FIXME: This contains no state whatsoever. It should probably be changed into a module.
class OSC::Machete::TorqueHelper
+ # FIXME: Use ood_cluster gem
+ LIB = ENV['TORQUE_LIB'] || '/opt/torque/lib64'
+ BIN = ENV['TORQUE_BIN'] || '/opt/torque/bin'
+ HOSTS = {
+ 'oakley' => 'oak-batch.osc.edu',
+ 'ruby' => 'ruby-batch.osc.edu',
+ 'quick' => 'quick-batch.osc.edu',
+ 'owens' => 'owens-batch.ten.osc.edu',
+ :default => 'oak-batch.osc.edu'
+ }
- # Alias to initialize a new object.
- def self.default
- self::new()
+ class << self
+ #@!attribute default
+ # @return [TorqueHelper] default TorqueHelper instance to use
+ attr_writer :default
+ def default
+ @default ||= self::new()
+ end
end
# Returns an OSC::Machete::Status ValueObject for a char
#
# @param [String] char The Torque status char
#
- # @example Completed
- # status_for_char("C") #=> OSC::Machete::Status.completed
+ # @example Passed
+ # status_for_char("C") #=> OSC::Machete::Status.passed
# @example Queued
# status_for_char("W") #=> OSC::Machete::Status.queued
#
# @return [OSC::Machete::Status] The status corresponding to the char
def status_for_char(char)
@@ -49,16 +63,10 @@
# Where depends_on is a hash with key being dependency type and array containing the
# arguments. See documentation on dependency_list in qsub man pages for details.
#
# Bills against the project specified by the primary group of the user.
def qsub(script, host: nil, depends_on: {}, account_string: nil)
- # if the script is set to run on Oakley in PBS headers
- # this is to obviate current torque filter defect in which
- # a script with PBS header set to specify oak-batch ends
- # isn't properly handled and the job gets limited to 4GB
- pbs_job = get_pbs_job( host.nil? ? get_pbs_conn(script: script) : get_pbs_conn(host: host) )
-
headers = { depend: qsub_dependencies_header(depends_on) }
headers.clear if headers[:depend].empty?
# currently we set the billable project to the name of the primary group
# this will probably be both SUPERCOMPUTER CENTER SPECIFIC and must change
@@ -68,11 +76,11 @@
headers[PBS::ATTR[:A]] = account_string
elsif account_string_valid_project?(default_account_string)
headers[PBS::ATTR[:A]] = default_account_string
end
- pbs_job.submit(file: script, headers: headers, qsub: true).id
+ pbs(host: host, script: script).submit_script(script, headers: headers, qsub: true)
end
# convert dependencies hash to a PBS header string
def qsub_dependencies_header(depends_on = {})
depends_on.map { |x|
@@ -101,90 +109,71 @@
#
# @param [String] pbsid The pbsid of the job to inspect.
#
# @return [Status] The job state
def qstat(pbsid, host: nil)
-
- # Create a PBS::Job object based on the pbsid or the optional host param
- pbs_conn = host.nil? ? get_pbs_conn(pbsid: pbsid.to_s) : get_pbs_conn(host: host)
- pbs_job = get_pbs_job(pbs_conn, pbsid)
-
- job_status = pbs_job.status
- # Get the status char value from the job.
- status_for_char job_status[:attribs][:job_state][0]
-
- rescue PBS::UnkjobidError => err
+ id = pbsid.to_s
+ status = pbs(host: host, id: id).get_job(id, filters: [:job_state])
+ status_for_char status[id][:job_state][0] # get status from status char value
+ rescue PBS::UnkjobidError
OSC::Machete::Status.passed
end
# Perform a qdel command on a single job.
#
# @param [String] pbsid The pbsid of the job to be deleted.
#
# @return [nil]
def qdel(pbsid, host: nil)
-
- pbs_conn = host.nil? ? get_pbs_conn(pbsid: pbsid.to_s) : get_pbs_conn(host: host)
- pbs_job = get_pbs_job(pbs_conn, pbsid.to_s)
-
- pbs_job.delete
-
- rescue PBS::UnkjobidError => err
+ id = pbsid.to_s
+ pbs(host: host, id: id).delete_job(id)
+ rescue PBS::UnkjobidError
# Common use case where trying to delete a job that is no longer in the system.
end
- private
+ def pbs(host: nil, id: nil, script: nil)
+ if host
+ # actually check if host is "oakley" i.e. a cluster key
+ host = HOSTS.fetch(host.to_s, host.to_s)
+ else
+ # try to determine host
+ key = host_from_pbsid(id) if id
+ key = host_from_script_pbs_header(script) if script && key.nil?
- # Factory to return a PBS::Job object
- def get_pbs_job(conn, pbsid=nil)
- pbsid.nil? ? PBS::Job.new(conn: conn) : PBS::Job.new(conn: conn, id: pbsid.to_s)
+ host = HOSTS.fetch(key, HOSTS.fetch(:default))
end
- # Returns a PBS connection object
- #
- # @option [:script] A PBS script with headers as string
- # @option [:pbsid] A valid pbsid as string
- #
- # @return [PBS::Conn] A connection option for the PBS host (Default: Oakley)
- def get_pbs_conn(options={})
- if options[:script]
- PBS::Conn.batch(host_from_script_pbs_header(options[:script]))
- elsif options[:pbsid]
- PBS::Conn.batch(host_from_pbsid(options[:pbsid]))
- elsif options[:host]
- PBS::Conn.batch(options[:host])
- else
- PBS::Conn.batch("oakley")
- end
- end
+ PBS::Batch.new(
+ host: host,
+ lib: LIB,
+ bin: BIN
+ )
+ end
+ private
# return the name of the host to use based on the pbs header
# TODO: Think of a more efficient way to do this.
def host_from_script_pbs_header(script)
if (File.open(script) { |f| f.read =~ /#PBS -q @oak-batch/ })
"oakley"
- elsif (File.open(script) { |f| f.read =~ /#PBS -q @opt-batch/ })
- "glenn"
elsif (File.open(script) { |f| f.read =~ /#PBS -q @ruby-batch/ })
"ruby"
elsif (File.open(script) { |f| f.read =~ /#PBS -q @quick-batch/ })
"quick"
- else
- "oakley" # DEFAULT
+ elsif (File.open(script) { |f| f.read =~ /#PBS -q @owens-batch/ })
+ "owens"
end
end
# Return the PBS host string based on a full pbsid string
def host_from_pbsid(pbsid)
if (pbsid =~ /oak-batch/ )
"oakley"
- elsif (pbsid =~ /opt-batch/ )
- "glenn"
elsif (pbsid.to_s =~ /^\d+$/ )
"ruby"
elsif (pbsid =~ /quick/ )
"quick"
- else
- "oakley" # DEFAULT
+ elsif (pbsid =~ /owens/ )
+ "owens"
end
end
end