lib/genevalidator/output.rb in genevalidator-1.6.12 vs lib/genevalidator/output.rb in genevalidator-2.1.3
- old
+ new
@@ -1,17 +1,12 @@
-require 'erb'
-require 'fileutils'
require 'forwardable'
require 'json'
-require 'genevalidator/version'
-
module GeneValidator
class Output
extend Forwardable
- def_delegators GeneValidator, :opt, :config, :mutex, :mutex_html,
- :mutex_json
+ def_delegators GeneValidator, :opt, :config, :dirs, :mutex
attr_accessor :prediction_def
attr_accessor :nr_hits
# list of +ValidationReport+ objects
attr_accessor :validations
@@ -25,76 +20,55 @@
##
# Initilizes the object
# Params:
# +current_idx+: index of the current query
def initialize(current_idx, no_of_hits, definition)
- @opt = opt
- @config = config
+ @opt = opt
+ @dirs = dirs
+ @config = config
@config[:run_no] += 1
+ output_dir = @dirs[:output_dir]
+ @output_filename = File.join(output_dir, "#{@dirs[:filename]}_results")
@prediction_def = definition
@nr_hits = no_of_hits
@idx = current_idx
end
def print_output_console
+ return unless @opt[:output_formats].include? 'stdout'
+ c_fmt = "%3s\t%5s\t%20s\t%7s\t"
mutex.synchronize do
- print_console_header unless @config[:console_header_printed]
- short_def = @prediction_def.scan(/([^ ]+)/)[0][0]
- print format("%3s\t%5s\t%20s\t%7s\t", @idx, @overall_score, short_def,
- @nr_hits)
+ print_console_header(c_fmt)
+ short_def = @prediction_def.split(' ')[0]
+ print format(c_fmt, @idx, @overall_score, short_def, @nr_hits)
puts validations.map(&:print).join("\t").gsub(' ', ' ')
end
end
- def print_console_header
- @config[:console_header_printed] = true
- print format("%3s\t%5s\t%20s\t%7s\t", 'No', 'Score', 'Identifier',
- 'No_Hits')
- puts validations.map(&:short_header).join("\t")
+ def generate_json
+ fname = File.join(@dirs[:json_dir], "#{@dirs[:filename]}_#{@idx}.json")
+ row_data = { idx: @idx, overall_score: @overall_score,
+ definition: @prediction_def, no_hits: @nr_hits }
+ row = create_validation_hash(row_data)
+ arr_idx = @idx - 1
+ @config[:json_output][arr_idx] = row
+ File.open(fname, 'w') { |f| f.write(row.to_json) }
end
- def generate_html
- mutex_html.synchronize do
- output_html = output_filename
- query_erb = File.join(@config[:aux], 'template_query.erb')
- template_file = File.open(query_erb, 'r').read
- erb = ERB.new(template_file, 0, '>')
- File.open(output_html, 'a') { |f| f.write(erb.result(binding)) }
- end
- end
+ private
- def output_filename
- idx = (@config[:run_no].to_f / @config[:output_max]).ceil
- output_html = File.join(@config[:html_path], "results#{idx}.html")
- write_html_header(output_html)
- output_html
+ def print_console_header(c_fmt)
+ return if @config[:console_header_printed]
+ @config[:console_header_printed] = true
+ warn '==> Validating input sequences'
+ warn '' # blank line
+ print format(c_fmt, 'No', 'Score', 'Identifier', 'No_Hits')
+ puts validations.map(&:short_header).join("\t")
end
- def write_html_header(output_html)
- head_erb = File.join(@config[:aux], 'template_header.erb')
- set_up_html(head_erb, output_html) unless File.exist?(output_html)
- end
-
- def set_up_html(erb_file, output_file)
- return if File.exist?(output_file)
- template_contents = File.open(erb_file, 'r').read
- erb = ERB.new(template_contents, 0, '>')
- File.open(output_file, 'w+') { |f| f.write(erb.result(binding)) }
- end
-
- def generate_json
- mutex_json.synchronize do
- row = { idx: @idx, overall_score: @overall_score,
- definition: @prediction_def, no_hits: @nr_hits }
- row = create_validation_hashes(row)
- write_row_json(row)
- @config[:json_output] << row
- end
- end
-
- def create_validation_hashes(row)
+ def create_validation_hash(row)
row[:validations] = {}
@validations.each do |item|
val = add_basic_validation_info(item)
explain = add_explanation_data(item) if item.color != 'warning'
val.merge!(explain) if explain
@@ -104,11 +78,12 @@
row
end
def add_basic_validation_info(item)
{ header: item.header, description: item.description, status: item.color,
- print: item.print.gsub(' ', ' ') }
+ print: item.print.gsub(' ', ' '), run_time: item.run_time,
+ validation: item.validation }
end
def add_explanation_data(item)
{ approach: item.approach, explanation: item.explanation,
conclusion: item.conclusion }
@@ -122,140 +97,151 @@
aux1: g.aux1, aux2: g.aux2 }
end
graphs
end
- def write_row_json(row)
- row_json = File.join(@config[:plot_dir],
- "#{@config[:filename]}_#{@idx}.json")
- File.open(row_json, 'w') { |f| f.write(row.to_json) }
- end
+ class <<self
+ def print_console_footer(overall_evaluation, opt)
+ return unless (opt[:output_formats].include? 'stdout') ||
+ opt[:hide_summary]
+ warn ''
+ warn "==> #{overall_evaluation.join("\n")}"
+ warn ''
+ end
- def self.write_json_file(array, json_file)
- File.open(json_file, 'w') { |f| f.write(array.to_json) }
- end
+ def generate_overview(json_data, min_blast_hits)
+ scores_from_json = json_data.map { |e| e[:overall_score] }
+ quartiles = scores_from_json.all_quartiles
+ nee = calculate_no_quries_with_no_evidence(json_data)
+ no_mafft = count_mafft_errors(json_data)
+ no_internet = count_internet_errors(json_data)
+ map_errors = map_errors(json_data)
+ run_time = calculate_run_time(json_data)
+ min_hits = json_data.count { |e| e[:no_hits] < min_blast_hits }
+ overview_hash(scores_from_json, quartiles, nee, no_mafft, no_internet,
+ map_errors, run_time, min_hits)
+ end
- ##
- # Method that closes the gas in the html file and writes the overall
- # evaluation
- # Param:
- # +all_query_outputs+: array with +ValidationTest+ objects
- # +html_path+: path of the html folder
- # +filemane+: name of the fasta input file
- def self.print_footer(overview, config)
- set_overall_evaluation(overview, config)
+ def overview_hash(scores_from_json, quartiles, nee, no_mafft, no_internet,
+ map_errors, run_time, insufficient_BLAST_hits)
+ {
+ scores: scores_from_json,
+ no_queries: scores_from_json.length,
+ good_scores: scores_from_json.count { |s| s >= 75 },
+ bad_scores: scores_from_json.count { |s| s < 75 },
+ nee: nee, no_mafft: no_mafft, no_internet: no_internet,
+ map_errors: map_errors, run_time: run_time,
+ first_quartile_of_scores: quartiles[0],
+ second_quartile_of_scores: quartiles[1],
+ third_quartile_of_scores: quartiles[2],
+ insufficient_BLAST_hits: insufficient_BLAST_hits
+ }
+ end
- footer_erb = File.join(config[:aux], 'template_footer.erb')
+ # calculate number of queries that had warnings for all validations.
+ def calculate_no_quries_with_no_evidence(json_data)
+ all_warnings = 0
+ json_data.each do |row|
+ status = row[:validations].map { |_, h| h[:status] }
+ if status.count { |r| r == 'warning' } == status.length
+ all_warnings += 1
+ end
+ end
+ all_warnings
+ end
- no_of_results_files = (config[:run_no].to_f / config[:output_max]).ceil
- template_file = File.open(footer_erb, 'r').read
- erb = ERB.new(template_file, 0, '>')
+ def count_mafft_errors(json_data)
+ json_data.count do |row|
+ num = row[:validations].count { |_, h| h[:print] == 'Mafft error' }
+ num.zero? ? false : true
+ end
+ end
- output_files = []
- (1..no_of_results_files).each { |i| output_files << "results#{i}.html" }
+ def count_internet_errors(json_data)
+ json_data.count do |row|
+ num = row[:validations].count { |_, h| h[:print] == 'Internet error' }
+ num.zero? ? false : true
+ end
+ end
- (1..no_of_results_files).each do |i|
- results_html = File.join(config[:html_path], "results#{i}.html")
- File.open(results_html, 'a+') { |f| f.write(erb.result(binding)) }
+ def map_errors(json_data)
+ errors = Hash.new(0)
+ json_data.each do |row|
+ e = row[:validations].map { |s, h| s if h[:validation] == 'error' }
+ e.compact.each { |err| errors[err] += 1 }
+ end
+ errors
end
- turn_off_sorting(config[:html_path]) if no_of_results_files > 1
- end
+ def calculate_run_time(json_data)
+ run_time = Hash.new(Pair1.new(0, 0))
+ json_data.map do |row|
+ row[:validations].each do |short_header, v|
+ next if v[:run_time].nil? || v[:run_time].zero?
+ next if v[:validation] == 'unapplicable' || v[:validation] == 'error'
+ p = Pair1.new(run_time[short_header.to_s].x + v[:run_time],
+ run_time[short_header.to_s].y + 1)
+ run_time[short_header.to_s] = p
+ end
+ end
+ run_time
+ end
- def self.set_overall_evaluation(overview, config)
- overall_evaluation = overview(overview)
- less = overall_evaluation[0].gsub("\n", '<br>').gsub("'", %q(\\\'))
+ ##
+ # Calculates an overall evaluation of the output
+ # Params:
+ # +all_query_outputs+: Array of +ValidationTest+ objects
+ # Output
+ # Array of Strigs with the reports
+ def generate_evaluation_text(overview)
+ eval = general_overview(overview)
+ error_eval = errors_overview(overview)
+ time_eval = time_overview(overview)
- eval = print_summary_to_console(overall_evaluation, config[:summary])
- evaluation = eval.gsub("\n", '<br>').gsub("'", %q(\\\'))
+ [eval, error_eval, time_eval].reject(&:empty?)
+ end
- create_overview_json(overview[:scores], config[:plot_dir], less,
- evaluation)
- end
+ private
- def self.turn_off_sorting(html_path)
- script_file = File.join(html_path,
- 'files/js/genevalidator.compiled.min.js')
- content = File.read(script_file).gsub(',initTableSorter(),', ',')
- File.open("#{script_file}.tmp", 'w') { |f| f.puts content }
- FileUtils.mv("#{script_file}.tmp", script_file)
- end
+ def general_overview(o)
+ good_pred = o[:good_scores] == 1 ? 'One' : "#{o[:good_scores]} are"
+ bad_pred = o[:bad_scores] == 1 ? 'One' : "#{o[:bad_scores]} are"
- def self.print_summary_to_console(overall_evaluation, summary)
- # print to console
- eval = ''
- overall_evaluation.each { |e| eval << "#{e}\n" }
- $stderr.puts eval if summary
- $stderr.puts ''
- eval
- end
+ plural = 'prediction was' if o[:insufficient_BLAST_hits] == 1
+ plural = 'predictions were' if o[:insufficient_BLAST_hits] >= 2
+ b = "#{o[:insufficient_BLAST_hits]} #{plural} not evaluated due to an" \
+ ' insufficient number of BLAST hits.'
+ blast_hits = o[:insufficient_BLAST_hits].zero? ? '' : b
- # make the historgram with the resulted scores
- def self.create_overview_json(scores, plot_dir, less, evaluation)
- plot_file = File.join(plot_dir, 'overview.json')
- data = [scores.group_by { |a| a }.map { |k, vs| { 'key' => k, 'value' => vs.length, 'main' => false } }]
- hash = { data: data, type: :simplebars,
- title: 'Overall GeneValidator Score Evaluation',
- footer: '', xtitle: 'Validation Score',
- ytitle: 'Number of Queries', aux1: 10, aux2: '', less: less,
- evaluation: evaluation }
- File.open(plot_file, 'w') { |f| f.write hash.to_json }
- end
-
- ##
- # Calculates an overall evaluation of the output
- # Params:
- # +all_query_outputs+: Array of +ValidationTest+ objects
- # Output
- # Array of Strigs with the reports
- def self.overview(o)
- eval = general_overview(o)
- error_eval = errors_overview(o)
- time_eval = time_overview(o)
-
- overall_evaluation = [eval, error_eval, time_eval]
- overall_evaluation.select { |e| e != '' }
- end
-
- def self.general_overview(o)
- good_pred = (o[:good_scores] == 1) ? 'One' : "#{o[:good_scores]} are"
- bad_pred = (o[:bad_scores] == 1) ? 'One' : "#{o[:bad_scores]} are"
-
- eval = "Overall Query Score Evaluation:\n" \
- "#{o[:no_queries]} predictions were validated, from which there" \
- " were:\n" \
- "#{good_pred} good prediction(s),\n" \
- "#{bad_pred} possibly weak prediction(s).\n"
-
- if o[:nee] != 0 # nee = no evidence
- eval << "#{o[:nee]} could not be evaluated due to the lack of" \
- ' evidence.'
+ ['Overall Query Score Evaluation:',
+ "#{o[:no_queries]} predictions were validated, from which there were:",
+ "#{good_pred} good prediction(s),",
+ "#{bad_pred} possibly weak prediction(s).", blast_hits,
+ "The median overall score was #{o[:second_quartile_of_scores]} with" \
+ " an upper quartile of #{o[:third_quartile_of_scores]}" \
+ " and a lower quartile of #{o[:first_quartile_of_scores]}."]
end
- eval
- end
- # errors per validation
- def self.errors_overview(o)
- error_eval = ''
- o[:map_errors].each do |k, v|
- error_eval << "\nWe couldn't run #{k} Validation for #{v} queries"
+ # errors per validation
+ def errors_overview(o)
+ error_eval = o[:map_errors].map do |k, v|
+ "We couldn't run #{k} Validation for #{v} queries"
+ end
+ if o[:no_mafft] >= (o[:no_queries] - o[:nee])
+ error_eval << "We couldn't run MAFFT multiple alignment"
+ end
+ if o[:no_internet] >= (o[:no_queries] - o[:nee])
+ error_eval << "\nWe couldn't make use of your internet connection"
+ end
+ error_eval
end
- if o[:no_mafft] >= (o[:no_queries] - o[:nee])
- error_eval << "\nWe couldn't run MAFFT multiple alignment"
- end
- if o[:no_internet] >= (o[:no_queries] - o[:nee])
- error_eval << "\nWe couldn't make use of your internet connection"
- end
- error_eval
- end
- def self.time_overview(o)
- time_eval = ''
- o[:run_time].each do |key, value|
- average_time = value.x / (value.y).to_f
- time_eval << "\nAverage running time for #{key} Validation:" \
- " #{average_time.round(3)}s per validation"
+ def time_overview(o)
+ o[:run_time].map do |key, value|
+ mean_time = value.x / value.y.to_f
+ "Average running time for #{key} Validation: #{mean_time.round(3)}s" \
+ ' per validation'
+ end
end
- time_eval
end
end
end