module SpecIDXML; end require 'spec_id_xml' require 'strscan' class SampleEnzyme include SpecIDXML attr_accessor :name # amino acids after which to cleave attr_accessor :cut # cleave at 'cut' amino acids UNLESS it is followed by 'no_cut' attr_accessor :no_cut # 'C' or 'N' attr_accessor :sense # Currently, recognize: # trypsin # For other enzymes, you must set :cut, :no_cut, :name, and :sense def initialize(name=nil) @sense = nil @cut = nil @no_cut = nil @name = name if @name # set the values if we recognize this name send(@name.to_sym) end end def trypsin @sense = 'C' @cut = 'KR' @no_cut = 'P' end def to_pepxml element_xml(:sample_enzyme, [:name]) do short_element_xml(:specificity, [:cut, :no_cut, :sense]) end end # returns all peptides of missed cleavages <= 'missed_cleavages' # so 2 missed cleavages will return all no missed cleavage peptides # all 1 missed cleavages and all 2 missed cleavages. def digest(string, missed_cleavages=0) s = StringScanner.new(string) no_cut_regex = Regexp.new("[#{@no_cut}]") regex = Regexp.new("[#{@cut}]") peps = [] last_pos = 0 current_pep = '' loop do if s.eos? break end m = s.scan_until(regex) if m ## found a cut point last_pos = s.pos # is the next amino acid a no_cut? if string[s.pos,1] =~ no_cut_regex current_pep << m else # cut it current_pep << m peps << current_pep current_pep = '' end else ## didn't find a cut point current_pep << string[last_pos..-1] peps << current_pep break end end ## LOOP through and grab each set of missed cleavages from num down to 0 all_sets_of_peps = [] (0..missed_cleavages).to_a.reverse.map do |num_mc| all_sets_of_peps.push( *(get_missed_cleavages(peps, num_mc)) ) end all_sets_of_peps end # takes an array of peptides and returns an array containing 'num' missed # cleavages # DOES NOT contain peptides that contain < num of missed cleavages # (i.e., will not return missed cleaveages of 1 or 2 if num == 3 def get_missed_cleavages(tryptic_peps, num) (0...(tryptic_peps.size - num)).to_a.map do |i| tryptic_peps[i,num+1].join end end def self.tryptic(string, missed_cleavages=0) self.new("trypsin").digest(string, missed_cleavages) end end