Sha256: 75ed35335fca524faf39555be4173d99bef682baa0cce9f6130989568cd5b704

Contents?: true

Size: 1.47 KB

Versions: 1

Compression:

Stored size: 1.47 KB

Contents

require 'bioinform/support'
require 'bioinform/data_models/pm'
module Bioinform
  class PWM < PM
    def score_mean
      each_position.inject(0){ |mean, position| mean + position.each_index.inject(0){|sum, letter| sum + position[letter] * probability[letter]} }
    end
    def score_variance
      each_position.inject(0) do |variance, position|
        variance  + position.each_index.inject(0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
                    position.each_index.inject(0) { |sum,letter| sum + position[letter]    * probability[letter] }**2
      end
    end
    
    def threshold_gauss_estimation(pvalue)
      sigma = Math.sqrt(score_variance)
      n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
      score_mean + n_ * sigma
    end
    
    def score(word)
      word = word.upcase
      raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix'  unless word.length == length
      #raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters'  unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
      (0...length).map do |pos|
        begin
        # Need support of N-letters and other IUPAC
          letter = word[pos]
          matrix[pos][IndexByLetter[letter]]
        rescue
          raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters'
        end
      end.inject(&:+)
    end
    
    def to_pwm
      self
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
bioinform-0.1.5 lib/bioinform/data_models/pwm.rb