lib/big_simon/runners.rb in big_simon-0.2.0 vs lib/big_simon/runners.rb in big_simon-0.2.1
- old
+ new
@@ -4,11 +4,11 @@
module BigSimon
class Runners
# @note To match the other things, you'd like them to be key'd on the file name.
- def self.mummer exe, vir_dir, host_dir, outdir, threads
+ def self.mummer exe, vir_dir, host_dir, outdir, threads, all_seq_lengths
klass = Class.new.extend Rya::CoreExtensions::Math
FileUtils.mkdir_p outdir
mummer_outfname = File.join outdir, "mummer_out.txt"
@@ -78,14 +78,21 @@
# end
else
ary = line.strip.split " "
host = ary[0].sub(/___reverse$/, "").strip
- score = ary[3].to_i
Rya::AbortIf.assert hit_table[virus].has_key?(host)
+ Rya::AbortIf.assert all_seq_lengths[virus]
+ Rya::AbortIf.assert all_seq_lengths[host]
+
+ combined_seq_length = all_seq_lengths[virus] + all_seq_lengths[host]
+
+ score = ary[3].to_i / combined_seq_length * 1000
+
+
# unless hit_table[virus].has_key? host
# hit_table[virus][host] = -1
# end
# We only want the longest hit.
@@ -211,11 +218,11 @@
#
# @note I will make the specified outdir if it doesn't exist.
# @note Assumes that the files end with *.fa
# @note Assumes that the file names match the IDs. This SHOULD be taken care of by the big_simon program.
# @todo assert that fname thing matches sequence ID name.
- def self.homology vir_dir, host_dir, outdir, threads
+ def self.homology vir_dir, host_dir, outdir, threads, all_seq_lengths
FileUtils.mkdir_p outdir
host_orfs = File.join outdir, "host_orfs.homology"
host_orfs_blast_db = host_orfs + ".blast_db.homology"
@@ -270,10 +277,17 @@
host_id = ary[1].sub(/_[0-9]+$/, "")
score = ary[11].to_f
Rya::AbortIf.assert blast_table.has_key?(vir_id), "blast_table: got #{vir_id} should have been #{vir_simple_fname}"
+ Rya::AbortIf.assert all_seq_lengths[vir_id]
+ Rya::AbortIf.assert all_seq_lengths[host_id]
+
+ combined_seq_length = all_seq_lengths[vir_id] + all_seq_lengths[host_id]
+ score = score / combined_seq_length.to_f * 1000
+
+
blast_table[vir_id][host_id] += score
end
# Remove blast file
# FileUtils.rm_r blast_results if File.exist? blast_results
@@ -307,10 +321,13 @@
blast_info.each do |simple_vir_name, blast_table|
blast_table.each do |vir_id, host_scores|
collated_blast_table[vir_id] = []
host_simple_names.each do |host_id|
- scaled_score = klass.scale host_scores[host_id], 0, max_score, 1, 0
+
+
+ combined_seq_length = all_seq_lengths[vir_id] + all_seq_lengths[host_id]
+ scaled_score = klass.scale host_scores[host_id].to_f, 0, max_score, 1, 0
host_table = { host: host_id, score: host_scores[host_id], scaled_score: scaled_score }
collated_blast_table[vir_id] << host_table
end
end