Sha256: 19372011c839342384232b135b09e071dc617964ed7c3b86b845146fe5e9bd50

Contents?: true

Size: 1.59 KB

Versions: 2

Compression:

Stored size: 1.59 KB

Contents

require 'tap/task'
require 'ms/in_silico/digester'

module Ms
  module InSilico
    # :startdoc::task digest a protein sequence into peptides
    # Digest a protein sequence into an array of peptides.
    #
    #   % rap digest MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG --:i dump
    #   MIVIGR
    #   SIVHPYITNEYEPFAAEK
    #   QQILSIMAG
    #
    class Digest < Tap::Task
    
      config :digester, 'Trypsin'                # The name of the digester
      config :min_length, nil, &c.integer_or_nil # Minimum peptide length
      config :max_length, nil, &c.integer_or_nil # Maximum peptide length
      config :max_misses, 0, &c.integer          # The max # of missed cleavage sites
      config :site_digest, false, &c.boolean     # Digest to sites (rather than sequences)

      def process(sequence)
        unless d = Digester[digester]
          raise ArgumentError, "unknown digester: #{digester}" 
        end
        
        # extract sequence from FASTA entries
        sequence = $1 if sequence =~ /\A>.*?\n(.*)\z/m
        sequence.gsub!(/\s/, "")
        
        peptides = if site_digest 
          d.site_digest(sequence, max_misses)
        else
          d.digest(sequence, max_misses)
        end
        
        # filter
        peptides.delete_if do |peptide|
          peptide.length < min_length
        end if min_length
        
        peptides.delete_if do |peptide|
          peptide.length > max_length
        end if max_length
        
        log 'digest', "#{sequence[0..10]}#{sequence.length > 10 ? '...' : ''} to #{peptides.length} peptides"
        peptides
      end
      
    end 
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
ms-in_silico-0.4.0 lib/ms/in_silico/digest.rb
ms-in_silico-0.3.0 lib/ms/in_silico/digest.rb