lib/assembly/contig_printer.rb in finishm-0.0.2 vs lib/assembly/contig_printer.rb in finishm-0.0.4
- old
+ new
@@ -32,10 +32,27 @@
attr_accessor :end_probe_read_length
# Enumerable of Enumerables of OrientedNode objects, each list of OrientedNode objects
# corresponds to a path that forms the connection
attr_accessor :paths
+
+ # Remove all except the path with maximal coverage from @paths
+ def collapse_paths_to_maximal_coverage_path!
+ return if @paths.nil? or @paths.empty?
+ get_coverage = lambda do |path|
+ numerator = 0
+ denominator = 0
+ path.each do |onode|
+ numerator += onode.node.coverage * onode.node.length_alone
+ denominator += onode.node.length_alone
+ end
+ numerator.to_f / denominator
+ end
+ @paths = [@paths.max do |path1, path2|
+ get_coverage.call(path1) <=> get_coverage.call(path2)
+ end]
+ end
end
# Given two contigs, return a consensus path and variants of the path.
#
# ----------> <-------- start and end probes (ends of probe sequences may not form part of final path). Directions not variable.
@@ -291,10 +308,10 @@
# end
def clustalo(sequences)
i = 0
stdin = sequences.collect{|s| i+=1; ">#{i}\n#{s}\n"}.join('')
- log.info "Running clustalo with #{sequences.length} sequences, specifically: #{stdin}" #if log.debug?
+ log.info "Running clustalo with #{sequences.length} sequences, specifically: #{stdin}" if log.debug?
stdout = Bio::Commandeer.run "clustalo -t DNA -i - --output-order=input-order", {:stdin => stdin, :log => log}
to_return = []
header = true
Bio::FlatFile.foreach(Bio::FastaFormat, StringIO.new(stdout)) do |seq|
to_return.push seq.seq.to_s