lib/genomer-plugin-summary/metrics.rb in genomer-plugin-summary-0.0.3 vs lib/genomer-plugin-summary/metrics.rb in genomer-plugin-summary-0.0.4

- old
+ new

@@ -1,15 +1,17 @@ require 'genomer' +require 'genomer-plugin-summary/enumerators' require 'lazing' module GenomerPluginSummary::Metrics + include GenomerPluginSummary::Enumerators ALL = :all def gc_content(type,scfd) - gc = enumerator_for(type,scfd).mapping{|i| gc(i)}.inject(:+) || 0.0 - atgc = enumerator_for(type,scfd).mapping{|i| atgc(i)}.inject(:+) || 0.0 + gc = enumerator_for(type,scfd).mapping{|i| gc(i[:sequence])}.inject(:+) || 0.0 + atgc = enumerator_for(type,scfd).mapping{|i| atgc(i[:sequence])}.inject(:+) || 0.0 gc / atgc * 100 end def count(type,scfd) enumerator_for(type,scfd).count @@ -19,23 +21,35 @@ length(type,scfd) / length(ALL,scfd).to_f * 100 end def length(type,scfd) enumerator_for(type,scfd). - mapping(&:sequence). + mapping{|i| i[:sequence]}. mapping(&:length). inject(:+) || 0 end - def gc(entry) - entry.sequence.gsub(/[^GCgc]/,'').length.to_f + def gc(sequence) + sequence.gsub(/[^GCgc]/,'').length.to_f end - def atgc(entry) - entry.sequence.gsub(/[^ATGCatgc]/,'').length.to_f + def atgc(sequence) + sequence.gsub(/[^ATGCatgc]/,'').length.to_f end - def enumerator_for(type,scaffold) - scaffold.selecting{|i| [ALL,i.entry_type].include? type } + def sequence_total(seqs) + return Hash[[:start, :stop, :size, :percent, :gc].map{|i| [i, 0]}] if seqs.empty? + + totals = seqs.inject({:size => 0, :percent => 0, :gc => 0}) do |hash,entry| + hash[:start] ||= entry[:start] + hash[:stop] = entry[:stop] + hash[:size] += entry[:size] + hash[:percent] += entry[:percent] + hash[:gc] += entry[:gc] * entry[:size] + + hash + end + totals[:gc] /= totals[:size] + totals end end