prosite.rb in bio-1.1.0

- old
+ new
@@ -1,526 +1,507 @@
 #
 # = bio/db/prosite.rb - PROSITE database class
 #
-# Copyright::  Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
-# Licence::    LGPL
+# Copyright::  Copyright (C) 2001 Toshiaki Katayama <k@bioruby.org>
+# Licence::    Ruby's
 #
-# $Id: prosite.rb,v 0.13 2005/12/18 18:24:08 k Exp $
+# $Id: prosite.rb,v 0.16 2006/09/19 06:03:51 k Exp $
 #
-# == Description
-#
-#
-# == Example
-# == References
-#--
-#
-#  This library is free software; you can redistribute it and/or
-#  modify it under the terms of the GNU Lesser General Public
-#  License as published by the Free Software Foundation; either
-#  version 2 of the License, or (at your option) any later version.
-#
-#  This library is distributed in the hope that it will be useful,
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-#  Lesser General Public License for more details.
-#
-#  You should have received a copy of the GNU Lesser General Public
-#  License along with this library; if not, write to the Free Software
-#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
-#
-#++
-#
 
 require 'bio/db'
 
 module Bio
 
-  class PROSITE < EMBLDB
+class PROSITE < EMBLDB
 
-    # Delimiter
-    DELIMITER = "\n//\n"
+  # Delimiter
+  DELIMITER = "\n//\n"
 
-    # Delimiter
-    RS = DELIMITER
+  # Delimiter
+  RS = DELIMITER
 
-    # Bio::DB API
-    TAGSIZE = 5
+  # Bio::DB API
+  TAGSIZE = 5
 
 
-    def initialize(entry)
-      super(entry, TAGSIZE)
-    end
+  def initialize(entry)
+    super(entry, TAGSIZE)
+  end
 
 
-    # ID  Identification                     (Begins each entry; 1 per entry)
-    #
-    #  ID   ENTRY_NAME; ENTRY_TYPE.  (ENTRY_TYPE : PATTERN, MATRIX, RULE)
-    #
-    # Returns
-    def name
-      unless @data['ID']
-        @data['ID'], @data['TYPE'] = fetch('ID').chomp('.').split('; ')
-      end
-      @data['ID']
+  # ID  Identification                     (Begins each entry; 1 per entry)
+  #
+  #  ID   ENTRY_NAME; ENTRY_TYPE.  (ENTRY_TYPE : PATTERN, MATRIX, RULE)
+  #
+  # Returns
+  def name
+    unless @data['ID']
+      @data['ID'], @data['TYPE'] = fetch('ID').chomp('.').split('; ')
     end
+    @data['ID']
+  end
 
-    # Returns
-    def division
-      unless @data['TYPE']
-        name
-      end
-      @data['TYPE']
+  # Returns
+  def division
+    unless @data['TYPE']
+      name
     end
+    @data['TYPE']
+  end
 
 
-    # AC  Accession number                   (1 per entry)
-    #
-    #  AC   PSnnnnn;
-    #
-    # Returns
-    def ac
-      unless @data['AC']
-        @data['AC'] = fetch('AC').chomp(';')
-      end
-      @data['AC']
+  # AC  Accession number                   (1 per entry)
+  #
+  #  AC   PSnnnnn;
+  #
+  # Returns
+  def ac
+    unless @data['AC']
+      @data['AC'] = fetch('AC').chomp(';')
     end
+    @data['AC']
+  end
 
-    alias entry_id ac
+  alias entry_id ac
 
 
-    # DT  Date                               (1 per entry)
-    #
-    #  DT   MMM-YYYY (CREATED); MMM-YYYY (DATA UPDATE); MMM-YYYY (INFO UPDATE).
-    #
-    # Returns
-    def dt
-      field_fetch('DT')
-    end
+  # DT  Date                               (1 per entry)
+  #
+  #  DT   MMM-YYYY (CREATED); MMM-YYYY (DATA UPDATE); MMM-YYYY (INFO UPDATE).
+  #
+  # Returns
+  def dt
+    field_fetch('DT')
+  end
 
-    alias date dt
+  alias date dt
 
 
-    # DE  Short description                  (1 per entry)
-    #
-    #  DE   Description.
-    #
-    # Returns
-    def de
-      field_fetch('DE')
-    end
+  # DE  Short description                  (1 per entry)
+  #
+  #  DE   Description.
+  #
+  # Returns
+  def de
+    field_fetch('DE')
+  end
 
-    alias definition de
+  alias definition de
 
 
-    # PA  Pattern                            (>=0 per entry)
-    #
-    #  see - pa2re method
-    #
-    # Returns
-    def pa
-      field_fetch('PA')
-      @data['PA'] = fetch('PA') unless @data['PA']
-      @data['PA'].gsub!(/\s+/, '') if @data['PA']
-      @data['PA']
-    end
+  # PA  Pattern                            (>=0 per entry)
+  #
+  #  see - pa2re method
+  #
+  # Returns
+  def pa
+    field_fetch('PA')
+    @data['PA'] = fetch('PA') unless @data['PA']
+    @data['PA'].gsub!(/\s+/, '') if @data['PA']
+    @data['PA']
+  end
 
-    alias pattern pa
+  alias pattern pa
 
 
-    # MA  Matrix/profile                     (>=0 per entry)
-    #
-    #  see - ma2re method
-    #
-    # Returns
-    def ma
-      field_fetch('MA')
-    end
+  # MA  Matrix/profile                     (>=0 per entry)
+  #
+  #  see - ma2re method
+  #
+  # Returns
+  def ma
+    field_fetch('MA')
+  end
 
-    alias profile ma
+  alias profile ma
 
 
-    # RU  Rule                               (>=0 per entry)
-    #
-    #  RU   Rule_Description.
-    #
-    #  The rule is described in ordinary English and is free-format.
-    #
-    # Returns
-    def ru
-      field_fetch('RU')
-    end
+  # RU  Rule                               (>=0 per entry)
+  #
+  #  RU   Rule_Description.
+  #
+  #  The rule is described in ordinary English and is free-format.
+  #
+  # Returns
+  def ru
+    field_fetch('RU')
+  end
 
-    alias rule ru
+  alias rule ru
 
 
-    # NR  Numerical results                  (>=0 per entry)
-    #
-    #   - SWISS-PROT scan statistics of true and false positives/negatives
-    #
-    # /RELEASE     SWISS-PROT release  number and  total  number  of  sequence
-    #              entries in that release.
-    # /TOTAL       Total number of hits in SWISS-PROT.
-    # /POSITIVE    Number of  hits on proteins that are known to belong to the
-    #              set in consideration.
-    # /UNKNOWN     Number of  hits on  proteins that  could possibly belong to
-    #              the set in consideration.
-    # /FALSE_POS   Number of false hits (on unrelated proteins).
-    # /FALSE_NEG   Number of known missed hits.
-    # /PARTIAL     Number of  partial sequences  which belong  to the  set  in
-    #              consideration, but  which  are  not  hit  by the pattern or
-    #              profile because they are partial (fragment) sequences.
-    #
-    # Returns
-    def nr
-      unless @data['NR']
-        hash = {}			# temporal hash
-        fetch('NR').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
-          if v =~ /^(\d+)\((\d+)\)$/
-            hits = $1.to_i		# the number of hits
-            seqs = $2.to_i		# the number of sequences
-            v = [hits, seqs]
-          elsif v =~ /([\d\.]+),(\d+)/
-            sprel = $1			# the number of SWISS-PROT release
-            spseq = $2.to_i		# the number of SWISS-PROT sequences
-            v = [sprel, spseq]
-          else
-            v = v.to_i
-          end
-          hash[k] = v
+  # NR  Numerical results                  (>=0 per entry)
+  #
+  #   - SWISS-PROT scan statistics of true and false positives/negatives
+  #
+  # /RELEASE     SWISS-PROT release  number and  total  number  of  sequence
+  #              entries in that release.
+  # /TOTAL       Total number of hits in SWISS-PROT.
+  # /POSITIVE    Number of  hits on proteins that are known to belong to the
+  #              set in consideration.
+  # /UNKNOWN     Number of  hits on  proteins that  could possibly belong to
+  #              the set in consideration.
+  # /FALSE_POS   Number of false hits (on unrelated proteins).
+  # /FALSE_NEG   Number of known missed hits.
+  # /PARTIAL     Number of  partial sequences  which belong  to the  set  in
+  #              consideration, but  which  are  not  hit  by the pattern or
+  #              profile because they are partial (fragment) sequences.
+  #
+  # Returns
+  def nr
+    unless @data['NR']
+      hash = {}			# temporal hash
+      fetch('NR').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
+        if v =~ /^(\d+)\((\d+)\)$/
+          hits = $1.to_i		# the number of hits
+          seqs = $2.to_i		# the number of sequences
+          v = [hits, seqs]
+        elsif v =~ /([\d\.]+),(\d+)/
+          sprel = $1			# the number of SWISS-PROT release
+          spseq = $2.to_i		# the number of SWISS-PROT sequences
+          v = [sprel, spseq]
+        else
+          v = v.to_i
         end
-        @data['NR'] = hash
+        hash[k] = v
       end
-      @data['NR']
+      @data['NR'] = hash
     end
+    @data['NR']
+  end
 
-    alias statistics nr
+  alias statistics nr
 
-    # Returns
-    def release
-      statistics['RELEASE']
-    end
+  # Returns
+  def release
+    statistics['RELEASE']
+  end
 
-    # Returns
-    def swissprot_release_number
-      release.first
-    end
+  # Returns
+  def swissprot_release_number
+    release.first
+  end
 
-    # Returns
-    def swissprot_release_sequences
-      release.last
-    end
+  # Returns
+  def swissprot_release_sequences
+    release.last
+  end
 
-    # Returns
-    def total
-      statistics['TOTAL']
-    end
+  # Returns
+  def total
+    statistics['TOTAL']
+  end
 
-    # Returns
-    def total_hits
-      total.first
-    end
+  # Returns
+  def total_hits
+    total.first
+  end
 
-    # Returns
-    def total_sequences
-      total.last
-    end
+  # Returns
+  def total_sequences
+    total.last
+  end
 
-    # Returns
-    def positive
-      statistics['POSITIVE']
-    end
+  # Returns
+  def positive
+    statistics['POSITIVE']
+  end
 
-    # Returns
-    def positive_hits
-      positive.first
-    end
+  # Returns
+  def positive_hits
+    positive.first
+  end
 
-    # Returns
-    def positive_sequences
-      positive.last
-    end
+  # Returns
+  def positive_sequences
+    positive.last
+  end
 
-    # Returns
-    def unknown
-      statistics['UNKNOWN']
-    end
+  # Returns
+  def unknown
+    statistics['UNKNOWN']
+  end
 
-    # Returns
-    def unknown_hits
-      unknown.first
-    end
+  # Returns
+  def unknown_hits
+    unknown.first
+  end
 
-    # Returns
-    def unknown_sequences
-      unknown.last
-    end
+  # Returns
+  def unknown_sequences
+    unknown.last
+  end
 
-    # Returns
-    def false_pos
-      statistics['FALSE_POS']
-    end
+  # Returns
+  def false_pos
+    statistics['FALSE_POS']
+  end
 
-    # Returns
-    def false_positive_hits
-      false_pos.first
-    end
+  # Returns
+  def false_positive_hits
+    false_pos.first
+  end
 
-    # Returns
-    def false_positive_sequences
-      false_pos.last
-    end
+  # Returns
+  def false_positive_sequences
+    false_pos.last
+  end
 
-    # Returns
-    def false_neg
-      statistics['FALSE_NEG']
-    end
-    alias false_negative_hits false_neg
+  # Returns
+  def false_neg
+    statistics['FALSE_NEG']
+  end
+  alias false_negative_hits false_neg
 
-    # Returns
-    def partial
-      statistics['PARTIAL']
-    end
+  # Returns
+  def partial
+    statistics['PARTIAL']
+  end
 
 
-    # CC  Comments                           (>=0 per entry)
-    #
-    #  CC   /QUALIFIER=data; /QUALIFIER=data; .......
-    #
-    # /TAXO-RANGE  Taxonomic range.
-    # /MAX-REPEAT  Maximum known  number of  repetitions of  the pattern  in a
-    #              single protein.
-    # /SITE        Indication of an `interesting' site in the pattern.
-    # /SKIP-FLAG   Indication of  an entry that can be, in some cases, ignored
-    #              by a program (because it is too unspecific).
-    #
-    # Returns
-    def cc
-      unless @data['CC']
-        hash = {}			# temporal hash
-        fetch('CC').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
-          hash[k] = v
-        end
-        @data['CC'] = hash
+  # CC  Comments                           (>=0 per entry)
+  #
+  #  CC   /QUALIFIER=data; /QUALIFIER=data; .......
+  #
+  # /TAXO-RANGE  Taxonomic range.
+  # /MAX-REPEAT  Maximum known  number of  repetitions of  the pattern  in a
+  #              single protein.
+  # /SITE        Indication of an `interesting' site in the pattern.
+  # /SKIP-FLAG   Indication of  an entry that can be, in some cases, ignored
+  #              by a program (because it is too unspecific).
+  #
+  # Returns
+  def cc
+    unless @data['CC']
+      hash = {}			# temporal hash
+      fetch('CC').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
+        hash[k] = v
       end
-      @data['CC']
+      @data['CC'] = hash
     end
+    @data['CC']
+  end
 
-    alias comment cc
+  alias comment cc
 
-    # Returns
-    def taxon_range(expand = nil)
-      range = comment['TAXO-RANGE']
-      if range and expand
-        expand = []
-        range.scan(/./) do |x|
-          case x
-          when 'A'; expand.push('archaebacteria')
-          when 'B'; expand.push('bacteriophages')
-          when 'E'; expand.push('eukaryotes')
-          when 'P'; expand.push('prokaryotes')
-          when 'V'; expand.push('eukaryotic viruses')
-          end
+  # Returns
+  def taxon_range(expand = nil)
+    range = comment['TAXO-RANGE']
+    if range and expand
+      expand = []
+      range.scan(/./) do |x|
+        case x
+        when 'A'; expand.push('archaebacteria')
+        when 'B'; expand.push('bacteriophages')
+        when 'E'; expand.push('eukaryotes')
+        when 'P'; expand.push('prokaryotes')
+        when 'V'; expand.push('eukaryotic viruses')
         end
-        range = expand
       end
-      return range
+      range = expand
     end
+    return range
+  end
 
-    # Returns
-    def max_repeat
-      comment['MAX-REPEAT'].to_i
-    end
+  # Returns
+  def max_repeat
+    comment['MAX-REPEAT'].to_i
+  end
 
-    # Returns
-    def site
-      if comment['SITE']
-        num, desc = comment['SITE'].split(',')
-      end
-      return [num.to_i, desc]
+  # Returns
+  def site
+    if comment['SITE']
+      num, desc = comment['SITE'].split(',')
     end
+    return [num.to_i, desc]
+  end
 
-    # Returns
-    def skip_flag
-      if comment['SKIP-FLAG'] == 'TRUE'
-        return true
-      end
+  # Returns
+  def skip_flag
+    if comment['SKIP-FLAG'] == 'TRUE'
+      return true
     end
+  end
 
 
-    # DR  Cross-references to SWISS-PROT     (>=0 per entry)
-    #
-    #  DR   AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C;
-    #
-    # - `AC_NB' is the SWISS-PROT primary accession number of the entry to
-    #   which reference is being made.
-    # - `ENTRY_NAME' is the SWISS-PROT entry name.
-    # - `C' is a one character flag that can be one of the following:
-    #
-    # T For a true positive.
-    # N For a false negative; a sequence which belongs to the set under
-    #   consideration, but which has not been picked up by the pattern or
-    #   profile.
-    # P For a `potential' hit; a sequence that belongs to the set under
-    #   consideration, but which was not picked up because the region(s) that
-    #   are used as a 'fingerprint' (pattern or profile) is not yet available
-    #   in the data bank (partial sequence).
-    # ? For an unknown; a sequence which possibly could belong to the set under
-    #   consideration.
-    # F For a false positive; a sequence which does not belong to the set in
-    #   consideration.
-    #
-    # Returns
-    def dr
-      unless @data['DR']
-        hash = {}			# temporal hash
-        if fetch('DR')
-          fetch('DR').scan(/(\w+)\s*, (\w+)\s*, (.);/).each do |a, e, c|
-            hash[a] = [e, c]	# SWISS-PROT : accession, entry, true/false
-          end
+  # DR  Cross-references to SWISS-PROT     (>=0 per entry)
+  #
+  #  DR   AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C;
+  #
+  # - `AC_NB' is the SWISS-PROT primary accession number of the entry to
+  #   which reference is being made.
+  # - `ENTRY_NAME' is the SWISS-PROT entry name.
+  # - `C' is a one character flag that can be one of the following:
+  #
+  # T For a true positive.
+  # N For a false negative; a sequence which belongs to the set under
+  #   consideration, but which has not been picked up by the pattern or
+  #   profile.
+  # P For a `potential' hit; a sequence that belongs to the set under
+  #   consideration, but which was not picked up because the region(s) that
+  #   are used as a 'fingerprint' (pattern or profile) is not yet available
+  #   in the data bank (partial sequence).
+  # ? For an unknown; a sequence which possibly could belong to the set under
+  #   consideration.
+  # F For a false positive; a sequence which does not belong to the set in
+  #   consideration.
+  #
+  # Returns
+  def dr
+    unless @data['DR']
+      hash = {}			# temporal hash
+      if fetch('DR')
+        fetch('DR').scan(/(\w+)\s*, (\w+)\s*, (.);/).each do |a, e, c|
+          hash[a] = [e, c]	# SWISS-PROT : accession, entry, true/false
         end
-        @data['DR'] = hash
       end
-      @data['DR']
+      @data['DR'] = hash
     end
+    @data['DR']
+  end
 
-    alias sp_xref dr
+  alias sp_xref dr
 
-    # Returns
-    def list_xref(flag, by_name = nil)
-      ary = []
-      sp_xref.each do |sp_acc, value|
-        if value[1] == flag
-          if by_name
-            sp_name = value[0]
-            ary.push(sp_name)
-          else
-            ary.push(sp_acc)
-          end
+  # Returns
+  def list_xref(flag, by_name = nil)
+    ary = []
+    sp_xref.each do |sp_acc, value|
+      if value[1] == flag
+        if by_name
+          sp_name = value[0]
+          ary.push(sp_name)
+        else
+          ary.push(sp_acc)
         end
       end
-      return ary
     end
+    return ary
+  end
 
-    # Returns
-    def list_truepositive(by_name = nil)
-      list_xref('T', by_name)
-    end
+  # Returns
+  def list_truepositive(by_name = nil)
+    list_xref('T', by_name)
+  end
 
-    # Returns
-    def list_falsenegative(by_name = nil)
-      list_xref('F', by_name)
-    end
+  # Returns
+  def list_falsenegative(by_name = nil)
+    list_xref('F', by_name)
+  end
 
-    # Returns
-    def list_falsepositive(by_name = nil)
-      list_xref('P', by_name)
-    end
+  # Returns
+  def list_falsepositive(by_name = nil)
+    list_xref('P', by_name)
+  end
 
-    # Returns
-    def list_potentialhit(by_name = nil)
-      list_xref('P', by_name)
-    end
+  # Returns
+  def list_potentialhit(by_name = nil)
+    list_xref('P', by_name)
+  end
 
-    # Returns
-    def list_unknown(by_name = nil)
-      list_xref('?', by_name)
-    end
+  # Returns
+  def list_unknown(by_name = nil)
+    list_xref('?', by_name)
+  end
 
 
-    # 3D  Cross-references to PDB            (>=0 per entry)
-    #
-    #  3D   name; [name2;...]
-    #
-    # Returns
-    def pdb_xref
-      unless @data['3D']
-        @data['3D'] = fetch('3D').split(/; */)
-      end
-      @data['3D']
+  # 3D  Cross-references to PDB            (>=0 per entry)
+  #
+  #  3D   name; [name2;...]
+  #
+  # Returns
+  def pdb_xref
+    unless @data['3D']
+      @data['3D'] = fetch('3D').split(/; */)
     end
+    @data['3D']
+  end
 
 
-    # DO  Pointer to the documentation file  (1 per entry)
-    #
-    #  DO   PDOCnnnnn;
-    #
-    # Returns
-    def pdoc_xref
-      @data['DO'] = fetch('DO').chomp(';')
-    end
+  # DO  Pointer to the documentation file  (1 per entry)
+  #
+  #  DO   PDOCnnnnn;
+  #
+  # Returns
+  def pdoc_xref
+    @data['DO'] = fetch('DO').chomp(';')
+  end
 
 
-    ### prosite pattern to regular expression
-    #
-    # prosite/prosuser.txt:
-    #
-    # The PA (PAttern) lines contains the definition of a PROSITE pattern. The
-    # patterns are described using the following conventions:
-    #
-    # 0) The standard IUPAC one-letter codes for the amino acids are used.
-    # 0) Ambiguities are indicated by listing the acceptable amino acids for a
-    #   given position, between square parentheses `[ ]'. For example: [ALT]
-    #   stands for Ala or Leu or Thr.
-    # 1) A period ends the pattern.
-    # 2) When a pattern is restricted to either the N- or C-terminal of a
-    #   sequence, that pattern either starts with a `<' symbol or respectively
-    #   ends with a `>' symbol.
-    # 3) Ambiguities are also indicated by listing between a pair of curly
-    #   brackets `{ }' the amino acids that are not accepted at a given
-    #   position. For example: {AM} stands for any amino acid except Ala and
-    #   Met.
-    # 4) Repetition of an element of the pattern can be indicated by following
-    #   that element with a numerical value or a numerical range between
-    #   parenthesis. Examples: x(3) corresponds to x-x-x, x(2,4) corresponds to
-    #   x-x or x-x-x or x-x-x-x.
-    # 5) The symbol `x' is used for a position where any amino acid is accepted.
-    # 6) Each element in a pattern is separated from its neighbor by a `-'.
-    #
-    # Examples:
-    #
-    # PA  [AC]-x-V-x(4)-{ED}.
-    #
-    # This pattern is translated as: [Ala or Cys]-any-Val-any-any-any-any-{any
-    # but Glu or Asp}
-    #
-    # PA  <A-x-[ST](2)-x(0,1)-V.
-    #
-    # This pattern, which must be in the N-terminal of the sequence (`<'), is
-    # translated as: Ala-any-[Ser or Thr]-[Ser or Thr]-(any or none)-Val
-    #
-    def self.pa2re(pattern)
-      pattern.gsub!(/\s/, '')	# remove white spaces
-      pattern.sub!(/\.$/, '')	# (1) remove trailing '.'
-      pattern.sub!(/^</, '^')	# (2) restricted to the N-terminal : `<'
-      pattern.sub!(/>$/, '$')	# (2) restricted to the C-terminal : `>'
-      pattern.gsub!(/\{(\w+)\}/) { |m|
-        '[^' + $1 + ']'		# (3) not accepted at a given position : '{}'
-      }
-      pattern.gsub!(/\(([\d,]+)\)/) { |m|
-        '{' + $1 + '}'		# (4) repetition of an element : (n), (n,m)
-      }
-      pattern.tr!('x', '.')	# (5) any amino acid is accepted : 'x'
-      pattern.tr!('-', '')	# (6) each element is separated by a '-'
-      Regexp.new(pattern)
-    end
+  ### prosite pattern to regular expression
+  #
+  # prosite/prosuser.txt:
+  #
+  # The PA (PAttern) lines contains the definition of a PROSITE pattern. The
+  # patterns are described using the following conventions:
+  #
+  # 0) The standard IUPAC one-letter codes for the amino acids are used.
+  # 0) Ambiguities are indicated by listing the acceptable amino acids for a
+  #   given position, between square parentheses `[ ]'. For example: [ALT]
+  #   stands for Ala or Leu or Thr.
+  # 1) A period ends the pattern.
+  # 2) When a pattern is restricted to either the N- or C-terminal of a
+  #   sequence, that pattern either starts with a `<' symbol or respectively
+  #   ends with a `>' symbol.
+  # 3) Ambiguities are also indicated by listing between a pair of curly
+  #   brackets `{ }' the amino acids that are not accepted at a given
+  #   position. For example: {AM} stands for any amino acid except Ala and
+  #   Met.
+  # 4) Repetition of an element of the pattern can be indicated by following
+  #   that element with a numerical value or a numerical range between
+  #   parenthesis. Examples: x(3) corresponds to x-x-x, x(2,4) corresponds to
+  #   x-x or x-x-x or x-x-x-x.
+  # 5) The symbol `x' is used for a position where any amino acid is accepted.
+  # 6) Each element in a pattern is separated from its neighbor by a `-'.
+  #
+  # Examples:
+  #
+  # PA  [AC]-x-V-x(4)-{ED}.
+  #
+  # This pattern is translated as: [Ala or Cys]-any-Val-any-any-any-any-{any
+  # but Glu or Asp}
+  #
+  # PA  <A-x-[ST](2)-x(0,1)-V.
+  #
+  # This pattern, which must be in the N-terminal of the sequence (`<'), is
+  # translated as: Ala-any-[Ser or Thr]-[Ser or Thr]-(any or none)-Val
+  #
+  def self.pa2re(pattern)
+    pattern.gsub!(/\s/, '')	# remove white spaces
+    pattern.sub!(/\.$/, '')	# (1) remove trailing '.'
+    pattern.sub!(/^</, '^')	# (2) restricted to the N-terminal : `<'
+    pattern.sub!(/>$/, '$')	# (2) restricted to the C-terminal : `>'
+    pattern.gsub!(/\{(\w+)\}/) { |m|
+      '[^' + $1 + ']'		# (3) not accepted at a given position : '{}'
+    }
+    pattern.gsub!(/\(([\d,]+)\)/) { |m|
+      '{' + $1 + '}'		# (4) repetition of an element : (n), (n,m)
+    }
+    pattern.tr!('x', '.')	# (5) any amino acid is accepted : 'x'
+    pattern.tr!('-', '')	# (6) each element is separated by a '-'
+    Regexp.new(pattern, Regexp::IGNORECASE)
+  end
 
-    def pa2re(pattern)
-      self.class.pa2re(pattern)
-    end
+  def pa2re(pattern)
+    self.class.pa2re(pattern)
+  end
 
+  def re
+    self.class.pa2re(self.pa)
+  end
 
-    ### prosite profile to regular expression
-    #
-    # prosite/profile.txt:
-    #
-    # Returns
-    def ma2re(matrix)
-      raise NotImplementedError
-    end
 
+  ### prosite profile to regular expression
+  #
+  # prosite/profile.txt:
+  #
+  # Returns
+  def ma2re(matrix)
+    raise NotImplementedError
   end
 
-end
+end # PROSITE
+
+end # Bio
 
 
 if __FILE__ == $0
 
   begin