lib/big_simon/runners.rb in big_simon-0.2.0 vs lib/big_simon/runners.rb in big_simon-0.2.1

- old
+ new

@@ -4,11 +4,11 @@ module BigSimon class Runners # @note To match the other things, you'd like them to be key'd on the file name. - def self.mummer exe, vir_dir, host_dir, outdir, threads + def self.mummer exe, vir_dir, host_dir, outdir, threads, all_seq_lengths klass = Class.new.extend Rya::CoreExtensions::Math FileUtils.mkdir_p outdir mummer_outfname = File.join outdir, "mummer_out.txt" @@ -78,14 +78,21 @@ # end else ary = line.strip.split " " host = ary[0].sub(/___reverse$/, "").strip - score = ary[3].to_i Rya::AbortIf.assert hit_table[virus].has_key?(host) + Rya::AbortIf.assert all_seq_lengths[virus] + Rya::AbortIf.assert all_seq_lengths[host] + + combined_seq_length = all_seq_lengths[virus] + all_seq_lengths[host] + + score = ary[3].to_i / combined_seq_length * 1000 + + # unless hit_table[virus].has_key? host # hit_table[virus][host] = -1 # end # We only want the longest hit. @@ -211,11 +218,11 @@ # # @note I will make the specified outdir if it doesn't exist. # @note Assumes that the files end with *.fa # @note Assumes that the file names match the IDs. This SHOULD be taken care of by the big_simon program. # @todo assert that fname thing matches sequence ID name. - def self.homology vir_dir, host_dir, outdir, threads + def self.homology vir_dir, host_dir, outdir, threads, all_seq_lengths FileUtils.mkdir_p outdir host_orfs = File.join outdir, "host_orfs.homology" host_orfs_blast_db = host_orfs + ".blast_db.homology" @@ -270,10 +277,17 @@ host_id = ary[1].sub(/_[0-9]+$/, "") score = ary[11].to_f Rya::AbortIf.assert blast_table.has_key?(vir_id), "blast_table: got #{vir_id} should have been #{vir_simple_fname}" + Rya::AbortIf.assert all_seq_lengths[vir_id] + Rya::AbortIf.assert all_seq_lengths[host_id] + + combined_seq_length = all_seq_lengths[vir_id] + all_seq_lengths[host_id] + score = score / combined_seq_length.to_f * 1000 + + blast_table[vir_id][host_id] += score end # Remove blast file # FileUtils.rm_r blast_results if File.exist? blast_results @@ -307,10 +321,13 @@ blast_info.each do |simple_vir_name, blast_table| blast_table.each do |vir_id, host_scores| collated_blast_table[vir_id] = [] host_simple_names.each do |host_id| - scaled_score = klass.scale host_scores[host_id], 0, max_score, 1, 0 + + + combined_seq_length = all_seq_lengths[vir_id] + all_seq_lengths[host_id] + scaled_score = klass.scale host_scores[host_id].to_f, 0, max_score, 1, 0 host_table = { host: host_id, score: host_scores[host_id], scaled_score: scaled_score } collated_blast_table[vir_id] << host_table end end