exe/big_simon in big_simon-0.1.1 vs exe/big_simon in big_simon-0.2.0
- old
+ new
@@ -52,11 +52,12 @@
"--threads must be > 0"
programs = [
"WIsH",
"VirHostMatcher",
- "mummer"
+ "mummer",
+ "homology",
]
outdir = opts[:outdir]
threads = opts[:threads]
virus_fnames = opts[:viruses]
@@ -69,127 +70,96 @@
tmpdir_host = File.join tmpdir, "host"
# all_predictions_fname = File.join outdir, "scores_all.txt"
mean_scaled_scores_fname = File.join outdir, "scores_scaled.mean.txt"
-# virus_recs, host_recs = [], []
-# Tempfile.open do |vir_f|
-# Tempfile.open do |host_f|
-# virus_fnames.each do |fname|
-# ParseFasta::SeqFile.open(fname).each_record do |rec|
-# vir_f.puts rec
-#
-# vir_f.puts ">#{rec.id}___reverse\n#{rec.seq.reverse}"
-# end
-# end
-#
-# host_fnames.each do |fname|
-# ParseFasta::SeqFile.open(fname).each_record do |rec|
-# host_f.puts rec
-# host_f.puts ">#{rec.id}___reverse\n#{rec.seq.reverse}"
-# end
-# end
-#
-# vir_f.fsync
-# host_f.fsync
-#
-# cmd = "mummer -maxmatch -l 15 #{host_f.path} #{vir_f.path} > /Users/moorer/Desktop/mummer.OUT"
-# Process.run_and_time_it! "MUMMER", cmd
-# end
-# end
-#
-# header = nil
-# hits = []
-# hit_info = {}
-# virus = nil
-#
-# File.open("/Users/moorer/Desktop/mummer.OUT", "rt").each_line.with_index do |line, idx|
-# if line.start_with? '>'
-# virus = line.chomp.sub(/^>/, "").sub(/___reverse$/, "").strip
-#
-# unless hit_info.has_key? virus
-# hit_info[virus] = {}
-# end
-# else
-# host, _, _, len = line.chomp.strip.split(" ")
-# host = host.sub(/___reverse$/, "").strip
-#
-# unless hit_info[virus].has_key? host
-# hit_info[virus][host] = -1
-# end
-#
-# hit_info[virus][host] = len.to_i if len.to_i > hit_info[virus][host]
-# end
-# end
-#
-# puts
-#
-# hh = hit_info.map do |virus, info|
-# [virus, info.to_a.sort_by {|gen, len| len}.reverse]
-# end
-#
-# pp hh
-
-# hh = hit_info.map do |virus, info|
-# [virus, info.to_a.sort_by { |host, hit_len| hit_len }.reverse
-#
-# end
-# p hit_info
-
-scores_files = {}
-programs.each do |program|
- raw_fname = File.join outdir, "scores_raw.#{program}.txt"
- scaled_fname = File.join outdir, "scores_scaled.#{program}.txt"
-
- scores_files[program] = {
- raw: File.open(raw_fname, "w"),
- scaled: File.open(scaled_fname, "w")
- }
-end
-
-scores_files.each do |program, files|
- files.each do |name, file|
- file.puts %w[virus host score].join "\t"
- end
-end
-
name_map_virus, all_ids_virus = BigSimon::Utils.set_up_tmp_dirs virus_fnames, tmpdir_virus, "virus"
name_map_host, all_ids_host = BigSimon::Utils.set_up_tmp_dirs host_fnames, tmpdir_host, "host"
wish_outf = BigSimon::Runners.wish BigSimon::WISH, tmpdir_virus, tmpdir_host, tmpdir, threads
vhm_outf = BigSimon::Runners.vir_host_matcher BigSimon::VHM, tmpdir_virus, tmpdir_host, tmpdir
# TODO separate the parser from the runner for mummer.
host_info_mummer = BigSimon::Runners.mummer BigSimon::MUMMER, tmpdir_virus, tmpdir_host, tmpdir, threads
+puts "mummer"
+pp host_info_mummer
+puts
+
+# TODO separate the parser from the runner for homology
+host_info_homology = BigSimon::Runners.homology tmpdir_virus, tmpdir_host, tmpdir, threads
+
+puts "homology"
+pp host_info_homology
+puts
+
+
+
host_info_wish = BigSimon::Parsers.wish wish_outf
host_info_vhm = BigSimon::Parsers.vir_host_matcher vhm_outf
-host_info_simple_names = BigSimon::Pipeline.collate_host_results [host_info_wish, host_info_vhm, host_info_mummer], programs
+puts "wish"
+pp host_info_wish
+puts
+
+puts "vhm"
+pp host_info_vhm
+puts
+
+
+host_info_simple_names = BigSimon::Pipeline.collate_host_results [host_info_wish, host_info_vhm, host_info_mummer, host_info_homology], programs
host_info = BigSimon::Pipeline.map_taxa host_info_simple_names, name_map_virus, name_map_host
puts
+pp host_info_simple_names
+puts
+
+puts
pp host_info
puts
# Just a basic all info file
# File.open all_predictions_fname, "w" do |f|
# f.puts %w[virus host program score scaled.score].join "\t"
+scores_files = {}
+programs.each do |program|
+ raw_fname = File.join outdir, "scores_raw.#{program}.txt"
+ scaled_fname = File.join outdir, "scores_scaled.#{program}.txt"
+
+ scores_files[program] = {
+ raw: File.open(raw_fname, "w"),
+ scaled: File.open(scaled_fname, "w")
+ }
+end
+
+scores_files.each do |program, files|
+ files.each do |name, file|
+ puts "LALA 3: #{[program, name, file]} #{%w[virus host score].join}"
+
+ file.puts %w[virus host score].join "\t"
+ end
+end
+
host_info.each do |virus, h1|
h1.each do |host, h2|
lines = {}
h2[:scores].each do |program, score|
lines[[virus, host, program]] = [score]
+ puts "LALA: #{[virus, host, score]}"
+
scores_files[program][:raw].puts [virus, host, score].join "\t"
end
# Add in the scaled score too.
h2[:scaled_scores].each do |program, score|
lines[[virus, host, program]] << score
+
+ puts "LALA 2: #{[virus, host, score]}"
+
scores_files[program][:scaled].puts [virus, host, score].join "\t"
end
# lines.each do |(virus, host, program), (score, scaled_score)|