lib/genomer-plugin-summary/metrics.rb in genomer-plugin-summary-0.0.3 vs lib/genomer-plugin-summary/metrics.rb in genomer-plugin-summary-0.0.4
- old
+ new
@@ -1,15 +1,17 @@
require 'genomer'
+require 'genomer-plugin-summary/enumerators'
require 'lazing'
module GenomerPluginSummary::Metrics
+ include GenomerPluginSummary::Enumerators
ALL = :all
def gc_content(type,scfd)
- gc = enumerator_for(type,scfd).mapping{|i| gc(i)}.inject(:+) || 0.0
- atgc = enumerator_for(type,scfd).mapping{|i| atgc(i)}.inject(:+) || 0.0
+ gc = enumerator_for(type,scfd).mapping{|i| gc(i[:sequence])}.inject(:+) || 0.0
+ atgc = enumerator_for(type,scfd).mapping{|i| atgc(i[:sequence])}.inject(:+) || 0.0
gc / atgc * 100
end
def count(type,scfd)
enumerator_for(type,scfd).count
@@ -19,23 +21,35 @@
length(type,scfd) / length(ALL,scfd).to_f * 100
end
def length(type,scfd)
enumerator_for(type,scfd).
- mapping(&:sequence).
+ mapping{|i| i[:sequence]}.
mapping(&:length).
inject(:+) || 0
end
- def gc(entry)
- entry.sequence.gsub(/[^GCgc]/,'').length.to_f
+ def gc(sequence)
+ sequence.gsub(/[^GCgc]/,'').length.to_f
end
- def atgc(entry)
- entry.sequence.gsub(/[^ATGCatgc]/,'').length.to_f
+ def atgc(sequence)
+ sequence.gsub(/[^ATGCatgc]/,'').length.to_f
end
- def enumerator_for(type,scaffold)
- scaffold.selecting{|i| [ALL,i.entry_type].include? type }
+ def sequence_total(seqs)
+ return Hash[[:start, :stop, :size, :percent, :gc].map{|i| [i, 0]}] if seqs.empty?
+
+ totals = seqs.inject({:size => 0, :percent => 0, :gc => 0}) do |hash,entry|
+ hash[:start] ||= entry[:start]
+ hash[:stop] = entry[:stop]
+ hash[:size] += entry[:size]
+ hash[:percent] += entry[:percent]
+ hash[:gc] += entry[:gc] * entry[:size]
+
+ hash
+ end
+ totals[:gc] /= totals[:size]
+ totals
end
end