lib/genevalidator/output.rb in genevalidator-1.6.12 vs lib/genevalidator/output.rb in genevalidator-2.1.3

- old
+ new

@@ -1,17 +1,12 @@ -require 'erb' -require 'fileutils' require 'forwardable' require 'json' -require 'genevalidator/version' - module GeneValidator class Output extend Forwardable - def_delegators GeneValidator, :opt, :config, :mutex, :mutex_html, - :mutex_json + def_delegators GeneValidator, :opt, :config, :dirs, :mutex attr_accessor :prediction_def attr_accessor :nr_hits # list of +ValidationReport+ objects attr_accessor :validations @@ -25,76 +20,55 @@ ## # Initilizes the object # Params: # +current_idx+: index of the current query def initialize(current_idx, no_of_hits, definition) - @opt = opt - @config = config + @opt = opt + @dirs = dirs + @config = config @config[:run_no] += 1 + output_dir = @dirs[:output_dir] + @output_filename = File.join(output_dir, "#{@dirs[:filename]}_results") @prediction_def = definition @nr_hits = no_of_hits @idx = current_idx end def print_output_console + return unless @opt[:output_formats].include? 'stdout' + c_fmt = "%3s\t%5s\t%20s\t%7s\t" mutex.synchronize do - print_console_header unless @config[:console_header_printed] - short_def = @prediction_def.scan(/([^ ]+)/)[0][0] - print format("%3s\t%5s\t%20s\t%7s\t", @idx, @overall_score, short_def, - @nr_hits) + print_console_header(c_fmt) + short_def = @prediction_def.split(' ')[0] + print format(c_fmt, @idx, @overall_score, short_def, @nr_hits) puts validations.map(&:print).join("\t").gsub('&nbsp;', ' ') end end - def print_console_header - @config[:console_header_printed] = true - print format("%3s\t%5s\t%20s\t%7s\t", 'No', 'Score', 'Identifier', - 'No_Hits') - puts validations.map(&:short_header).join("\t") + def generate_json + fname = File.join(@dirs[:json_dir], "#{@dirs[:filename]}_#{@idx}.json") + row_data = { idx: @idx, overall_score: @overall_score, + definition: @prediction_def, no_hits: @nr_hits } + row = create_validation_hash(row_data) + arr_idx = @idx - 1 + @config[:json_output][arr_idx] = row + File.open(fname, 'w') { |f| f.write(row.to_json) } end - def generate_html - mutex_html.synchronize do - output_html = output_filename - query_erb = File.join(@config[:aux], 'template_query.erb') - template_file = File.open(query_erb, 'r').read - erb = ERB.new(template_file, 0, '>') - File.open(output_html, 'a') { |f| f.write(erb.result(binding)) } - end - end + private - def output_filename - idx = (@config[:run_no].to_f / @config[:output_max]).ceil - output_html = File.join(@config[:html_path], "results#{idx}.html") - write_html_header(output_html) - output_html + def print_console_header(c_fmt) + return if @config[:console_header_printed] + @config[:console_header_printed] = true + warn '==> Validating input sequences' + warn '' # blank line + print format(c_fmt, 'No', 'Score', 'Identifier', 'No_Hits') + puts validations.map(&:short_header).join("\t") end - def write_html_header(output_html) - head_erb = File.join(@config[:aux], 'template_header.erb') - set_up_html(head_erb, output_html) unless File.exist?(output_html) - end - - def set_up_html(erb_file, output_file) - return if File.exist?(output_file) - template_contents = File.open(erb_file, 'r').read - erb = ERB.new(template_contents, 0, '>') - File.open(output_file, 'w+') { |f| f.write(erb.result(binding)) } - end - - def generate_json - mutex_json.synchronize do - row = { idx: @idx, overall_score: @overall_score, - definition: @prediction_def, no_hits: @nr_hits } - row = create_validation_hashes(row) - write_row_json(row) - @config[:json_output] << row - end - end - - def create_validation_hashes(row) + def create_validation_hash(row) row[:validations] = {} @validations.each do |item| val = add_basic_validation_info(item) explain = add_explanation_data(item) if item.color != 'warning' val.merge!(explain) if explain @@ -104,11 +78,12 @@ row end def add_basic_validation_info(item) { header: item.header, description: item.description, status: item.color, - print: item.print.gsub('&nbsp;', ' ') } + print: item.print.gsub('&nbsp;', ' '), run_time: item.run_time, + validation: item.validation } end def add_explanation_data(item) { approach: item.approach, explanation: item.explanation, conclusion: item.conclusion } @@ -122,140 +97,151 @@ aux1: g.aux1, aux2: g.aux2 } end graphs end - def write_row_json(row) - row_json = File.join(@config[:plot_dir], - "#{@config[:filename]}_#{@idx}.json") - File.open(row_json, 'w') { |f| f.write(row.to_json) } - end + class <<self + def print_console_footer(overall_evaluation, opt) + return unless (opt[:output_formats].include? 'stdout') || + opt[:hide_summary] + warn '' + warn "==> #{overall_evaluation.join("\n")}" + warn '' + end - def self.write_json_file(array, json_file) - File.open(json_file, 'w') { |f| f.write(array.to_json) } - end + def generate_overview(json_data, min_blast_hits) + scores_from_json = json_data.map { |e| e[:overall_score] } + quartiles = scores_from_json.all_quartiles + nee = calculate_no_quries_with_no_evidence(json_data) + no_mafft = count_mafft_errors(json_data) + no_internet = count_internet_errors(json_data) + map_errors = map_errors(json_data) + run_time = calculate_run_time(json_data) + min_hits = json_data.count { |e| e[:no_hits] < min_blast_hits } + overview_hash(scores_from_json, quartiles, nee, no_mafft, no_internet, + map_errors, run_time, min_hits) + end - ## - # Method that closes the gas in the html file and writes the overall - # evaluation - # Param: - # +all_query_outputs+: array with +ValidationTest+ objects - # +html_path+: path of the html folder - # +filemane+: name of the fasta input file - def self.print_footer(overview, config) - set_overall_evaluation(overview, config) + def overview_hash(scores_from_json, quartiles, nee, no_mafft, no_internet, + map_errors, run_time, insufficient_BLAST_hits) + { + scores: scores_from_json, + no_queries: scores_from_json.length, + good_scores: scores_from_json.count { |s| s >= 75 }, + bad_scores: scores_from_json.count { |s| s < 75 }, + nee: nee, no_mafft: no_mafft, no_internet: no_internet, + map_errors: map_errors, run_time: run_time, + first_quartile_of_scores: quartiles[0], + second_quartile_of_scores: quartiles[1], + third_quartile_of_scores: quartiles[2], + insufficient_BLAST_hits: insufficient_BLAST_hits + } + end - footer_erb = File.join(config[:aux], 'template_footer.erb') + # calculate number of queries that had warnings for all validations. + def calculate_no_quries_with_no_evidence(json_data) + all_warnings = 0 + json_data.each do |row| + status = row[:validations].map { |_, h| h[:status] } + if status.count { |r| r == 'warning' } == status.length + all_warnings += 1 + end + end + all_warnings + end - no_of_results_files = (config[:run_no].to_f / config[:output_max]).ceil - template_file = File.open(footer_erb, 'r').read - erb = ERB.new(template_file, 0, '>') + def count_mafft_errors(json_data) + json_data.count do |row| + num = row[:validations].count { |_, h| h[:print] == 'Mafft error' } + num.zero? ? false : true + end + end - output_files = [] - (1..no_of_results_files).each { |i| output_files << "results#{i}.html" } + def count_internet_errors(json_data) + json_data.count do |row| + num = row[:validations].count { |_, h| h[:print] == 'Internet error' } + num.zero? ? false : true + end + end - (1..no_of_results_files).each do |i| - results_html = File.join(config[:html_path], "results#{i}.html") - File.open(results_html, 'a+') { |f| f.write(erb.result(binding)) } + def map_errors(json_data) + errors = Hash.new(0) + json_data.each do |row| + e = row[:validations].map { |s, h| s if h[:validation] == 'error' } + e.compact.each { |err| errors[err] += 1 } + end + errors end - turn_off_sorting(config[:html_path]) if no_of_results_files > 1 - end + def calculate_run_time(json_data) + run_time = Hash.new(Pair1.new(0, 0)) + json_data.map do |row| + row[:validations].each do |short_header, v| + next if v[:run_time].nil? || v[:run_time].zero? + next if v[:validation] == 'unapplicable' || v[:validation] == 'error' + p = Pair1.new(run_time[short_header.to_s].x + v[:run_time], + run_time[short_header.to_s].y + 1) + run_time[short_header.to_s] = p + end + end + run_time + end - def self.set_overall_evaluation(overview, config) - overall_evaluation = overview(overview) - less = overall_evaluation[0].gsub("\n", '<br>').gsub("'", %q(\\\')) + ## + # Calculates an overall evaluation of the output + # Params: + # +all_query_outputs+: Array of +ValidationTest+ objects + # Output + # Array of Strigs with the reports + def generate_evaluation_text(overview) + eval = general_overview(overview) + error_eval = errors_overview(overview) + time_eval = time_overview(overview) - eval = print_summary_to_console(overall_evaluation, config[:summary]) - evaluation = eval.gsub("\n", '<br>').gsub("'", %q(\\\')) + [eval, error_eval, time_eval].reject(&:empty?) + end - create_overview_json(overview[:scores], config[:plot_dir], less, - evaluation) - end + private - def self.turn_off_sorting(html_path) - script_file = File.join(html_path, - 'files/js/genevalidator.compiled.min.js') - content = File.read(script_file).gsub(',initTableSorter(),', ',') - File.open("#{script_file}.tmp", 'w') { |f| f.puts content } - FileUtils.mv("#{script_file}.tmp", script_file) - end + def general_overview(o) + good_pred = o[:good_scores] == 1 ? 'One' : "#{o[:good_scores]} are" + bad_pred = o[:bad_scores] == 1 ? 'One' : "#{o[:bad_scores]} are" - def self.print_summary_to_console(overall_evaluation, summary) - # print to console - eval = '' - overall_evaluation.each { |e| eval << "#{e}\n" } - $stderr.puts eval if summary - $stderr.puts '' - eval - end + plural = 'prediction was' if o[:insufficient_BLAST_hits] == 1 + plural = 'predictions were' if o[:insufficient_BLAST_hits] >= 2 + b = "#{o[:insufficient_BLAST_hits]} #{plural} not evaluated due to an" \ + ' insufficient number of BLAST hits.' + blast_hits = o[:insufficient_BLAST_hits].zero? ? '' : b - # make the historgram with the resulted scores - def self.create_overview_json(scores, plot_dir, less, evaluation) - plot_file = File.join(plot_dir, 'overview.json') - data = [scores.group_by { |a| a }.map { |k, vs| { 'key' => k, 'value' => vs.length, 'main' => false } }] - hash = { data: data, type: :simplebars, - title: 'Overall GeneValidator Score Evaluation', - footer: '', xtitle: 'Validation Score', - ytitle: 'Number of Queries', aux1: 10, aux2: '', less: less, - evaluation: evaluation } - File.open(plot_file, 'w') { |f| f.write hash.to_json } - end - - ## - # Calculates an overall evaluation of the output - # Params: - # +all_query_outputs+: Array of +ValidationTest+ objects - # Output - # Array of Strigs with the reports - def self.overview(o) - eval = general_overview(o) - error_eval = errors_overview(o) - time_eval = time_overview(o) - - overall_evaluation = [eval, error_eval, time_eval] - overall_evaluation.select { |e| e != '' } - end - - def self.general_overview(o) - good_pred = (o[:good_scores] == 1) ? 'One' : "#{o[:good_scores]} are" - bad_pred = (o[:bad_scores] == 1) ? 'One' : "#{o[:bad_scores]} are" - - eval = "Overall Query Score Evaluation:\n" \ - "#{o[:no_queries]} predictions were validated, from which there" \ - " were:\n" \ - "#{good_pred} good prediction(s),\n" \ - "#{bad_pred} possibly weak prediction(s).\n" - - if o[:nee] != 0 # nee = no evidence - eval << "#{o[:nee]} could not be evaluated due to the lack of" \ - ' evidence.' + ['Overall Query Score Evaluation:', + "#{o[:no_queries]} predictions were validated, from which there were:", + "#{good_pred} good prediction(s),", + "#{bad_pred} possibly weak prediction(s).", blast_hits, + "The median overall score was #{o[:second_quartile_of_scores]} with" \ + " an upper quartile of #{o[:third_quartile_of_scores]}" \ + " and a lower quartile of #{o[:first_quartile_of_scores]}."] end - eval - end - # errors per validation - def self.errors_overview(o) - error_eval = '' - o[:map_errors].each do |k, v| - error_eval << "\nWe couldn't run #{k} Validation for #{v} queries" + # errors per validation + def errors_overview(o) + error_eval = o[:map_errors].map do |k, v| + "We couldn't run #{k} Validation for #{v} queries" + end + if o[:no_mafft] >= (o[:no_queries] - o[:nee]) + error_eval << "We couldn't run MAFFT multiple alignment" + end + if o[:no_internet] >= (o[:no_queries] - o[:nee]) + error_eval << "\nWe couldn't make use of your internet connection" + end + error_eval end - if o[:no_mafft] >= (o[:no_queries] - o[:nee]) - error_eval << "\nWe couldn't run MAFFT multiple alignment" - end - if o[:no_internet] >= (o[:no_queries] - o[:nee]) - error_eval << "\nWe couldn't make use of your internet connection" - end - error_eval - end - def self.time_overview(o) - time_eval = '' - o[:run_time].each do |key, value| - average_time = value.x / (value.y).to_f - time_eval << "\nAverage running time for #{key} Validation:" \ - " #{average_time.round(3)}s per validation" + def time_overview(o) + o[:run_time].map do |key, value| + mean_time = value.x / value.y.to_f + "Average running time for #{key} Validation: #{mean_time.round(3)}s" \ + ' per validation' + end end - time_eval end end end