#!/usr/bin/env ruby Signal.trap("PIPE", "EXIT") require "pp" require "tempfile" require "parse_fasta" require "trollop" require "big_simon" # TODO make scaled scores with high score being better. Process.extend Rya::CoreExtensions::Process opts = Trollop.options do version BigSimon::VERSION_BANNER banner <<-EOS #{BigSimon::VERSION_BANNER} Hi, I'm BigSimon! I'm here to help you figure out the hosts for your viruses. I run a bunch of different programs. In addition to doing some merging of results, I'll give you heatmaps for all the programs and you can check for yourself. The scaled scores run from 0 to 1 with lower scores being better. Options: EOS opt :viruses, "Path to fasta file(s) with viruses", type: :strings opt :hosts, "Path to fasta file(s) with hosts", type: :strings opt :outdir, "Output directory", default: "big_simon" opt :threads, "Number of threads to use", default: 1 end Rya::AbortIf.logger.debug { "Command line opts: #{opts.inspect}" } BigSimon::Utils.check_opt! opts, :viruses BigSimon::Utils.check_opt! opts, :hosts # Check infiles [opts[:viruses], opts[:hosts]].flatten.each do |fname| BigSimon::Utils.check_file! fname end Rya::AbortIf.abort_unless opts[:threads] > 0, "--threads must be > 0" programs = [ "WIsH", "VirHostMatcher", "mummer", "homology", ] outdir = opts[:outdir] threads = opts[:threads] virus_fnames = opts[:viruses] host_fnames = opts[:hosts] FileUtils.mkdir_p outdir tmpdir = File.join opts[:outdir], "big_simon_tmp" tmpdir_virus = File.join tmpdir, "virus" tmpdir_host = File.join tmpdir, "host" # all_predictions_fname = File.join outdir, "scores_all.txt" mean_scaled_scores_fname = File.join outdir, "scores_scaled.mean.txt" name_map_virus, all_ids_virus, vir_seq_lengths = BigSimon::Utils.set_up_tmp_dirs virus_fnames, tmpdir_virus, "virus" name_map_host, all_ids_host, host_seq_lengths = BigSimon::Utils.set_up_tmp_dirs host_fnames, tmpdir_host, "host" all_seq_lengths = vir_seq_lengths.merge host_seq_lengths wish_outf = BigSimon::Runners.wish BigSimon::WISH, tmpdir_virus, tmpdir_host, tmpdir, threads vhm_outf = BigSimon::Runners.vir_host_matcher BigSimon::VHM, tmpdir_virus, tmpdir_host, tmpdir # TODO separate the parser from the runner for mummer. host_info_mummer = BigSimon::Runners.mummer BigSimon::MUMMER, tmpdir_virus, tmpdir_host, tmpdir, threads puts "mummer" pp host_info_mummer puts # TODO separate the parser from the runner for homology host_info_homology = BigSimon::Runners.homology tmpdir_virus, tmpdir_host, tmpdir, threads puts "homology" pp host_info_homology puts host_info_wish = BigSimon::Parsers.wish wish_outf host_info_vhm = BigSimon::Parsers.vir_host_matcher vhm_outf puts "wish" pp host_info_wish puts puts "vhm" pp host_info_vhm puts host_info_simple_names = BigSimon::Pipeline.collate_host_results [host_info_wish, host_info_vhm, host_info_mummer, host_info_homology], programs host_info = BigSimon::Pipeline.map_taxa host_info_simple_names, name_map_virus, name_map_host puts pp host_info_simple_names puts puts pp host_info puts # Just a basic all info file # File.open all_predictions_fname, "w" do |f| # f.puts %w[virus host program score scaled.score].join "\t" scores_files = {} programs.each do |program| raw_fname = File.join outdir, "scores_raw.#{program}.txt" scaled_fname = File.join outdir, "scores_scaled.#{program}.txt" scores_files[program] = { raw: File.open(raw_fname, "w"), scaled: File.open(scaled_fname, "w") } end scores_files.each do |program, files| files.each do |name, file| puts "LALA 3: #{[program, name, file]} #{%w[virus host score].join}" file.puts %w[virus host score].join "\t" end end host_info.each do |virus, h1| h1.each do |host, h2| lines = {} h2[:scores].each do |program, score| lines[[virus, host, program]] = [score] puts "LALA: #{[virus, host, score]}" scores_files[program][:raw].puts [virus, host, score].join "\t" end # Add in the scaled score too. h2[:scaled_scores].each do |program, score| lines[[virus, host, program]] << score puts "LALA 2: #{[virus, host, score]}" scores_files[program][:scaled].puts [virus, host, score].join "\t" end # lines.each do |(virus, host, program), (score, scaled_score)| # f.puts [virus, host, program, score, scaled_score].join "\t" # end end end # end # A file with mean scaled scores. File.open mean_scaled_scores_fname, "w" do |f| f.puts %w[virus host score].join "\t" host_info.each do |virus, h1| h1.each do |host, h2| scaled_scores = h2[:scaled_scores].values mean_scaled_score = scaled_scores.reduce(:+) / scaled_scores.length.to_f f.puts [virus, host, mean_scaled_score].join "\t" end end end scores_files.each do |program, file| file.values.map(&:close) end FileUtils.rm_r tmpdir # Make the heatmaps BigSimon::Runners.heatmaps BigSimon::RSCRIPT, outdir, File.join(outdir, "heatmaps")