module TreasureData module Command # TODO JOB_WAIT_MAX_RETRY_COUNT_ON_NETWORK_ERROR = 10 PRIORITY_FORMAT_MAP = { -2 => 'VERY LOW', -1 => 'LOW', 0 => 'NORMAL', 1 => 'HIGH', 2 => 'VERY HIGH', } PRIORITY_PARSE_MAP = { /\Avery[ _\-]?low\z/i => -2, /\A-2\z/ => -2, /\Alow\z/i => -1, /\A-1\z/ => -1, /\Anorm(?:al)?\z/i => 0, /\A[\-\+]?0\z/ => 0, /\Ahigh\z/i => 1, /\A[\+]?1\z/ => 1, /\Avery[ _\-]?high\z/i => 2, /\A[\+]?2\z/ => 2, } def job_list(op) page = 0 skip = 0 status = nil slower_than = nil op.on('-p', '--page PAGE', 'skip N pages', Integer) {|i| page = i } op.on('-s', '--skip N', 'skip N jobs', Integer) {|i| skip = i } op.on('-R', '--running', 'show only running jobs', TrueClass) {|b| status = 'running' } op.on('-S', '--success', 'show only succeeded jobs', TrueClass) {|b| status = 'success' } op.on('-E', '--error', 'show only failed jobs', TrueClass) {|b| status = 'error' } op.on('--slow [SECONDS]', 'show slow queries (default threshold: 3600 seconds)', Integer) {|i| slower_than = i || 3600 } set_render_format_option(op) max = op.cmd_parse max = (max || 20).to_i client = get_client if page skip += max * page end conditions = nil if slower_than conditions = {:slower_than => slower_than} end jobs = client.jobs(skip, skip + max - 1, status, conditions) rows = [] jobs.each {|job| start = job.start_at elapsed = Command.humanize_elapsed_time(start, job.end_at) cpu_time = Command.humanize_time(job.cpu_time, true) priority = job_priority_name_of(job.priority) rows << { :JobID => job.job_id, :Database => job.db_name, :Status => job.status, :Type => job.type, :Query => job.query.to_s[0,50] + " ...", :Start => (start ? start.localtime : ''), :Elapsed => elapsed.rjust(11), :CPUTime => cpu_time.rjust(17), :ResultSize => (job.result_size ? Command.humanize_bytesize(job.result_size, 2) : ""), :Priority => priority, :Result => job.result_url } } puts cmd_render_table(rows, :fields => [:JobID, :Status, :Start, :Elapsed, :CPUTime, :ResultSize, :Priority, :Result, :Type, :Database, :Query], :max_width => 1000, :render_format => op.render_format ) end def job_show(op) verbose = nil wait = false output = nil format = nil render_opts = {:header => false} limit = nil exclude = false op.on('-v', '--verbose', 'show logs', TrueClass) {|b| verbose = b } op.on('-w', '--wait', 'wait for finishing the job', TrueClass) {|b| wait = b } op.on('-G', '--vertical', 'use vertical table to show results', TrueClass) {|b| render_opts[:vertical] = b } op.on('-o', '--output PATH', 'write result to the file') {|s| unless Dir.exist?(File.dirname(s)) s = File.expand_path(s) end output = s format = 'tsv' if format.nil? } op.on('-f', '--format FORMAT', 'format of the result to write to the file (tsv, csv, json, msgpack, and msgpack.gz)') {|s| unless ['tsv', 'csv', 'json', 'msgpack', 'msgpack.gz'].include?(s) raise "Unknown format #{s.dump}. Supported formats are: tsv, csv, json, msgpack, and msgpack.gz" end format = s } op.on('-l', '--limit ROWS', 'limit the number of result rows shown when not outputting to file') {|s| unless s.to_i > 0 raise "Invalid limit number. Must be a positive integer" end limit = s.to_i } op.on('-c', '--column-header', 'output of the columns\' header when the schema is available', ' for the table (only applies to tsv and csv formats)', TrueClass) {|b| render_opts[:header] = b; } op.on('-x', '--exclude', 'do not automatically retrieve the job result', TrueClass) {|b| exclude = b } job_id = op.cmd_parse # parameter concurrency validation if output.nil? && format unless ['tsv', 'csv', 'json'].include?(format) raise ParameterConfigurationError, "Supported formats are only tsv, csv and json without -o / --output option" end end if render_opts[:header] unless ['tsv', 'csv'].include?(format) raise ParameterConfigurationError, "Option -c / --column-header is only supported with tsv and csv formats" end end if !output.nil? && !limit.nil? raise ParameterConfigurationError, "Option -l / --limit is only valid when not outputting to file (no -o / --output option provided)" end client = get_client job = client.job(job_id) puts "JobID : #{job.job_id}" #puts "URL : #{job.url}" puts "Status : #{job.status}" puts "Type : #{job.type}" puts "Database : #{job.db_name}" # exclude some fields from bulk_import_perform type jobs if [:hive, :pig, :impala, :presto].include?(job.type) puts "Priority : #{job_priority_name_of(job.priority)}" puts "Retry limit : #{job.retry_limit}" puts "Output : #{job.result_url}" puts "Query : #{job.query}" elsif job.type == :bulk_import_perform puts "Destination : #{job.query}" end # if the job is done and is of type hive, show the Map-Reduce cumulated CPU time if job.finished? if [:hive].include?(job.type) puts "CPU time : #{Command.humanize_time(job.cpu_time, true)}" end if [:hive, :pig, :impala, :presto].include?(job.type) puts "Result size : #{Command.humanize_bytesize(job.result_size, 2)}" end end # up to 7 retries with exponential (base 2) back-off starting at 'retry_delay' retry_delay = 5 max_cumul_retry_delay = 200 cumul_retry_delay = 0 if wait && !job.finished? wait_job(job) if [:hive, :pig, :impala, :presto].include?(job.type) && !exclude puts "Result :" begin show_result(job, output, limit, format, render_opts) rescue TreasureData::NotFoundError => e # Got 404 because result not found. rescue TreasureData::APIError, # HTTP status code 500 or more Errno::ECONNREFUSED, Errno::ECONNRESET, Timeout::Error, EOFError, OpenSSL::SSL::SSLError, SocketError => e # don't retry on 300 and 400 errors if e.class == TreasureData::APIError && e.message !~ /^5\d\d:\s+/ raise e end if cumul_retry_delay > max_cumul_retry_delay raise e end $stderr.puts "Error #{e.class}: #{e.message}. Retrying after #{retry_delay} seconds..." sleep retry_delay cumul_retry_delay += retry_delay retry_delay *= 2 retry end end else if [:hive, :pig, :impala, :presto].include?(job.type) && !exclude && job.finished? puts "Result :" begin show_result(job, output, limit, format, render_opts) rescue TreasureData::NotFoundError => e # Got 404 because result not found. rescue TreasureData::APIError, Errno::ECONNREFUSED, Errno::ECONNRESET, Timeout::Error, EOFError, OpenSSL::SSL::SSLError, SocketError => e # don't retry on 300 and 400 errors if e.class == TreasureData::APIError && e.message !~ /^5\d\d:\s+/ raise e end if cumul_retry_delay > max_cumul_retry_delay raise e end $stderr.puts "Error #{e.class}: #{e.message}. Retrying after #{retry_delay} seconds..." sleep retry_delay cumul_retry_delay += retry_delay retry_delay *= 2 retry end end if verbose if !job.debug['cmdout'].nil? puts "" puts "Output:" job.debug['cmdout'].to_s.split("\n").each {|line| puts " " + line } end if !job.debug['stderr'].nil? puts "" puts "Details:" job.debug['stderr'].to_s.split("\n").each {|line| puts " " + line } end end end puts "\rUse '-v' option to show detailed messages." + " " * 20 unless verbose end def job_status(op) job_id = op.cmd_parse client = get_client puts client.job_status(job_id) end def job_kill(op) job_id = op.cmd_parse client = get_client former_status = client.kill(job_id) if TreasureData::Job::FINISHED_STATUS.include?(former_status) $stderr.puts "Job #{job_id} is already finished (#{former_status})" exit 0 end if former_status == TreasureData::Job::STATUS_RUNNING $stderr.puts "Job #{job_id} is killed." else $stderr.puts "Job #{job_id} is canceled." end end private def wait_job(job, first_call = false) $stderr.puts "queued..." cmdout_lines = 0 stderr_lines = 0 max_error_counts = JOB_WAIT_MAX_RETRY_COUNT_ON_NETWORK_ERROR while first_call || !job.finished? first_call = false begin sleep 2 job.update_status! rescue Timeout::Error, SystemCallError, EOFError, SocketError if max_error_counts <= 0 raise end max_error_counts -= 1 retry end cmdout = job.debug['cmdout'].to_s.split("\n")[cmdout_lines..-1] || [] stderr = job.debug['stderr'].to_s.split("\n")[stderr_lines..-1] || [] (cmdout + stderr).each {|line| puts " "+line } cmdout_lines += cmdout.size stderr_lines += stderr.size end end def show_result(job, output, limit, format, render_opts={}) if output write_result(job, output, limit, format, render_opts) puts "\rwritten to #{output} in #{format} format" + " " * 50 else # every format that is allowed on stdout render_result(job, limit, format, render_opts) end end def write_result(job, output, limit, format, render_opts={}) # the next 3 formats allow writing to both a file and stdout case format when 'json' require 'yajl' open_file(output, "w") {|f| f.write "[" n_rows = 0 unless output.nil? indicator = Command::SizeBasedDownloadProgressIndicator.new( "NOTE: the job result is being written to #{output} in json format", job.result_size, 0.1, 1) end job.result_each_with_compr_size {|row, compr_size| indicator.update(compr_size) unless output.nil? f.write ",\n" if n_rows > 0 f.write Yajl.dump(row) n_rows += 1 break if output.nil? and !limit.nil? and n_rows == limit } f.write "]" indicator.finish unless output.nil? } puts if output.nil? when 'csv' require 'yajl' require 'csv' open_file(output, "w") {|f| writer = CSV.new(f) # output headers if render_opts[:header] && job.hive_result_schema writer << job.hive_result_schema.map {|name, type| name } end # output data n_rows = 0 unless output.nil? indicator = Command::SizeBasedDownloadProgressIndicator.new( "NOTE: the job result is being written to #{output} in csv format", job.result_size, 0.1, 1) end job.result_each_with_compr_size {|row, compr_size| indicator.update(compr_size) unless output.nil? # TODO limit the # of columns writer << row.map {|col| dump_column(col) } n_rows += 1 writer.flush if n_rows % 100 == 0 # flush every 100 recods break if output.nil? and !limit.nil? and n_rows == limit } indicator.finish unless output.nil? } when 'tsv' require 'yajl' open_file(output, "w") {|f| # output headers if render_opts[:header] && job.hive_result_schema job.hive_result_schema.each {|name,type| f.write name + "\t" } f.write "\n" end # output data n_rows = 0 unless output.nil? indicator = Command::SizeBasedDownloadProgressIndicator.new( "NOTE: the job result is being written to #{output} in tsv format", job.result_size, 0.1, 1) end job.result_each_with_compr_size {|row, compr_size| indicator.update(compr_size) unless output.nil? n_cols = 0 row.each {|col| f.write "\t" if n_cols > 0 # TODO limit the # of columns f.write dump_column(col) n_cols += 1 } f.write "\n" n_rows += 1 f.flush if n_rows % 100 == 0 # flush every 100 recods break if output.nil? and !limit.nil? and n_rows == limit } indicator.finish unless output.nil? } # these last 2 formats are only valid if writing the result to file through the -o/--output option. when 'msgpack' if output.nil? raise ParameterConfigurationError, "Format 'msgpack' does not support writing to stdout" end open_file(output, "wb") {|f| indicator = Command::SizeBasedDownloadProgressIndicator.new( "NOTE: the job result is being written to #{output} in msgpack format", job.result_size, 0.1, 1) job.result_format('msgpack', f) {|compr_size| indicator.update(compr_size) } indicator.finish } when 'msgpack.gz' if output.nil? raise ParameterConfigurationError, "Format 'msgpack' does not support writing to stdout" end open_file(output, "wb") {|f| indicator = Command::SizeBasedDownloadProgressIndicator.new( "NOTE: the job result is being written to #{output} in msgpack.gz format", job.result_size, 0.1, 1) job.result_format('msgpack.gz', f) {|compr_size| indicator.update(compr_size) } indicator.finish } else raise "Unknown format #{format.inspect}" end end def open_file(output, mode) f = nil if output.nil? yield STDOUT else f = File.open(output, mode) yield f end ensure if f f.close unless f.closed? end end def render_result(job, limit, format=nil, render_opts={}) require 'yajl' if format.nil? # display result in tabular format rows = [] n_rows = 0 indicator = Command::SizeBasedDownloadProgressIndicator.new( "WARNING: the job result is being downloaded...", job.result_size, 0.1, 1) job.result_each_with_compr_size {|row, compr_size| indicator.update(compr_size) rows << row.map {|v| dump_column(v) } n_rows += 1 break if !limit.nil? and n_rows == limit } print " " * 100, "\r" # make sure the previous WARNING is cleared over render_opts[:max_width] = 10000 if job.hive_result_schema render_opts[:change_fields] = job.hive_result_schema.map { |name,type| name } end print "\r" + " " * 50 puts "\r" + cmd_render_table(rows, render_opts) else # display result in any of: json, csv, tsv. # msgpack and mspgpack.gz are not supported for stdout output write_result(job, nil, limit, format, render_opts) end end def dump_column(v) require 'yajl' s = v.is_a?(String) ? v.to_s : Yajl.dump(v) # Here does UTF-8 -> UTF-16LE -> UTF8 conversion: # a) to make sure the string doesn't include invalid byte sequence # b) to display multi-byte characters as it is # c) encoding from UTF-8 to UTF-8 doesn't check/replace invalid chars # d) UTF-16LE was slightly faster than UTF-16BE, UTF-32LE or UTF-32BE s = s.encode('UTF-16LE', 'UTF-8', :invalid=>:replace, :undef=>:replace).encode!('UTF-8') if s.respond_to?(:encode) s end def job_priority_name_of(id) PRIORITY_FORMAT_MAP[id] || 'NORMAL' end def job_priority_id_of(name) PRIORITY_PARSE_MAP.each_pair {|pattern,id| return id if pattern.match(name) } return nil end end # module Command end # module TrasureData