lib/assembly/contig_printer.rb in finishm-0.0.2 vs lib/assembly/contig_printer.rb in finishm-0.0.4

- old
+ new

@@ -32,10 +32,27 @@ attr_accessor :end_probe_read_length # Enumerable of Enumerables of OrientedNode objects, each list of OrientedNode objects # corresponds to a path that forms the connection attr_accessor :paths + + # Remove all except the path with maximal coverage from @paths + def collapse_paths_to_maximal_coverage_path! + return if @paths.nil? or @paths.empty? + get_coverage = lambda do |path| + numerator = 0 + denominator = 0 + path.each do |onode| + numerator += onode.node.coverage * onode.node.length_alone + denominator += onode.node.length_alone + end + numerator.to_f / denominator + end + @paths = [@paths.max do |path1, path2| + get_coverage.call(path1) <=> get_coverage.call(path2) + end] + end end # Given two contigs, return a consensus path and variants of the path. # # ----------> <-------- start and end probes (ends of probe sequences may not form part of final path). Directions not variable. @@ -291,10 +308,10 @@ # end def clustalo(sequences) i = 0 stdin = sequences.collect{|s| i+=1; ">#{i}\n#{s}\n"}.join('') - log.info "Running clustalo with #{sequences.length} sequences, specifically: #{stdin}" #if log.debug? + log.info "Running clustalo with #{sequences.length} sequences, specifically: #{stdin}" if log.debug? stdout = Bio::Commandeer.run "clustalo -t DNA -i - --output-order=input-order", {:stdin => stdin, :log => log} to_return = [] header = true Bio::FlatFile.foreach(Bio::FastaFormat, StringIO.new(stdout)) do |seq| to_return.push seq.seq.to_s