lib/ms/in_silico/digest.rb in ms-in_silico-0.2.2 vs lib/ms/in_silico/digest.rb in ms-in_silico-0.2.3
- old
+ new
@@ -3,30 +3,47 @@
module Ms
module InSilico
# Ms::InSilico::Digest::manifest digest a protein sequence into peptides
# Digest a protein sequence into an array of peptides.
#
- # % rap digest MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG --: dump --no-audit
+ # % rap digest MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG --:i dump
# I[14:37:55] digest MIVIGRSIVHP... to 3 peptides
- # # date: 2008-09-15 14:37:55
- # ---
- # ms/in_silico/digest (23483900):
- # - - MIVIGR
- # - SIVHPYITNEYEPFAAEK
- # - QQILSIMAG
+ # MIVIGR
+ # SIVHPYITNEYEPFAAEK
+ # QQILSIMAG
#
class Digest < Tap::Task
- config :digester, 'Trypsin' # the name of the digester
- config :max_misses, 0, &c.integer # the max # of missed cleavage sites
- config :site_digest, false, &c.boolean # digest to sites (rather than sequences)
-
+ config :digester, 'Trypsin' # The name of the digester
+ config :min_length, nil, &c.integer_or_nil # Minimum peptide length
+ config :max_length, nil, &c.integer_or_nil # Maximum peptide length
+ config :max_misses, 0, &c.integer # The max # of missed cleavage sites
+ config :site_digest, false, &c.boolean # Digest to sites (rather than sequences)
+
def process(sequence)
unless d = Digester[digester]
raise ArgumentError, "unknown digester: #{digester}"
end
- peptides = site_digest ? d.site_digest(sequence, max_misses): d.digest(sequence, max_misses)
+ # extract sequence from FASTA entries
+ sequence = $1 if sequence =~ /\A>.*?\n(.*)\z/m
+ sequence.gsub!(/\s/, "")
+
+ peptides = if site_digest
+ d.site_digest(sequence, max_misses)
+ else
+ d.digest(sequence, max_misses)
+ end
+
+ # filter
+ peptides.delete_if do |peptide|
+ peptide.length < min_length
+ end if min_length
+
+ peptides.delete_if do |peptide|
+ peptide.length > max_length
+ end if max_length
+
log 'digest', "#{sequence[0..10]}#{sequence.length > 10 ? '...' : ''} to #{peptides.length} peptides"
peptides
end
end
\ No newline at end of file