exe/big_simon in big_simon-0.1.1 vs exe/big_simon in big_simon-0.2.0

- old
+ new

@@ -52,11 +52,12 @@ "--threads must be > 0" programs = [ "WIsH", "VirHostMatcher", - "mummer" + "mummer", + "homology", ] outdir = opts[:outdir] threads = opts[:threads] virus_fnames = opts[:viruses] @@ -69,127 +70,96 @@ tmpdir_host = File.join tmpdir, "host" # all_predictions_fname = File.join outdir, "scores_all.txt" mean_scaled_scores_fname = File.join outdir, "scores_scaled.mean.txt" -# virus_recs, host_recs = [], [] -# Tempfile.open do |vir_f| -# Tempfile.open do |host_f| -# virus_fnames.each do |fname| -# ParseFasta::SeqFile.open(fname).each_record do |rec| -# vir_f.puts rec -# -# vir_f.puts ">#{rec.id}___reverse\n#{rec.seq.reverse}" -# end -# end -# -# host_fnames.each do |fname| -# ParseFasta::SeqFile.open(fname).each_record do |rec| -# host_f.puts rec -# host_f.puts ">#{rec.id}___reverse\n#{rec.seq.reverse}" -# end -# end -# -# vir_f.fsync -# host_f.fsync -# -# cmd = "mummer -maxmatch -l 15 #{host_f.path} #{vir_f.path} > /Users/moorer/Desktop/mummer.OUT" -# Process.run_and_time_it! "MUMMER", cmd -# end -# end -# -# header = nil -# hits = [] -# hit_info = {} -# virus = nil -# -# File.open("/Users/moorer/Desktop/mummer.OUT", "rt").each_line.with_index do |line, idx| -# if line.start_with? '>' -# virus = line.chomp.sub(/^>/, "").sub(/___reverse$/, "").strip -# -# unless hit_info.has_key? virus -# hit_info[virus] = {} -# end -# else -# host, _, _, len = line.chomp.strip.split(" ") -# host = host.sub(/___reverse$/, "").strip -# -# unless hit_info[virus].has_key? host -# hit_info[virus][host] = -1 -# end -# -# hit_info[virus][host] = len.to_i if len.to_i > hit_info[virus][host] -# end -# end -# -# puts -# -# hh = hit_info.map do |virus, info| -# [virus, info.to_a.sort_by {|gen, len| len}.reverse] -# end -# -# pp hh - -# hh = hit_info.map do |virus, info| -# [virus, info.to_a.sort_by { |host, hit_len| hit_len }.reverse -# -# end -# p hit_info - -scores_files = {} -programs.each do |program| - raw_fname = File.join outdir, "scores_raw.#{program}.txt" - scaled_fname = File.join outdir, "scores_scaled.#{program}.txt" - - scores_files[program] = { - raw: File.open(raw_fname, "w"), - scaled: File.open(scaled_fname, "w") - } -end - -scores_files.each do |program, files| - files.each do |name, file| - file.puts %w[virus host score].join "\t" - end -end - name_map_virus, all_ids_virus = BigSimon::Utils.set_up_tmp_dirs virus_fnames, tmpdir_virus, "virus" name_map_host, all_ids_host = BigSimon::Utils.set_up_tmp_dirs host_fnames, tmpdir_host, "host" wish_outf = BigSimon::Runners.wish BigSimon::WISH, tmpdir_virus, tmpdir_host, tmpdir, threads vhm_outf = BigSimon::Runners.vir_host_matcher BigSimon::VHM, tmpdir_virus, tmpdir_host, tmpdir # TODO separate the parser from the runner for mummer. host_info_mummer = BigSimon::Runners.mummer BigSimon::MUMMER, tmpdir_virus, tmpdir_host, tmpdir, threads +puts "mummer" +pp host_info_mummer +puts + +# TODO separate the parser from the runner for homology +host_info_homology = BigSimon::Runners.homology tmpdir_virus, tmpdir_host, tmpdir, threads + +puts "homology" +pp host_info_homology +puts + + + host_info_wish = BigSimon::Parsers.wish wish_outf host_info_vhm = BigSimon::Parsers.vir_host_matcher vhm_outf -host_info_simple_names = BigSimon::Pipeline.collate_host_results [host_info_wish, host_info_vhm, host_info_mummer], programs +puts "wish" +pp host_info_wish +puts + +puts "vhm" +pp host_info_vhm +puts + + +host_info_simple_names = BigSimon::Pipeline.collate_host_results [host_info_wish, host_info_vhm, host_info_mummer, host_info_homology], programs host_info = BigSimon::Pipeline.map_taxa host_info_simple_names, name_map_virus, name_map_host puts +pp host_info_simple_names +puts + +puts pp host_info puts # Just a basic all info file # File.open all_predictions_fname, "w" do |f| # f.puts %w[virus host program score scaled.score].join "\t" +scores_files = {} +programs.each do |program| + raw_fname = File.join outdir, "scores_raw.#{program}.txt" + scaled_fname = File.join outdir, "scores_scaled.#{program}.txt" + + scores_files[program] = { + raw: File.open(raw_fname, "w"), + scaled: File.open(scaled_fname, "w") + } +end + +scores_files.each do |program, files| + files.each do |name, file| + puts "LALA 3: #{[program, name, file]} #{%w[virus host score].join}" + + file.puts %w[virus host score].join "\t" + end +end + host_info.each do |virus, h1| h1.each do |host, h2| lines = {} h2[:scores].each do |program, score| lines[[virus, host, program]] = [score] + puts "LALA: #{[virus, host, score]}" + scores_files[program][:raw].puts [virus, host, score].join "\t" end # Add in the scaled score too. h2[:scaled_scores].each do |program, score| lines[[virus, host, program]] << score + + puts "LALA 2: #{[virus, host, score]}" + scores_files[program][:scaled].puts [virus, host, score].join "\t" end # lines.each do |(virus, host, program), (score, scaled_score)|