# # bio/util/restriction_enzyme.rb - Digests DNA based on restriction enzyme cut patterns # # Author:: Trevor Wennblom # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com) # License:: The Ruby License # # $Id: restriction_enzyme.rb,v 1.16 2007/07/16 19:28:48 k Exp $ # module Bio #:nodoc: autoload :REBASE, 'bio/db/rebase' # = Description # # Bio::RestrictionEnzyme allows you to fragment a DNA strand using one # or more restriction enzymes. Bio::RestrictionEnzyme is aware that # multiple enzymes may be competing for the same recognition site and # returns the various possible fragmentation patterns that result in # such circumstances. # # When using Bio::RestrictionEnzyme you may simply use the name of common # enzymes to cut your sequence or you may construct your own unique enzymes # to use. # # Visit the documentaion for individual classes for more information. # # An examination of the unit tests will also reveal several interesting uses # for the curious programmer. # # = Usage # # == Basic # # EcoRI cut pattern: # G|A A T T C # +-------+ # C T T A A|G # # This can also be written as: # G^AATTC # # Note that to use the method +cut_with_enzyme+ from a Bio::Sequence object # you currently must +require+ +bio/util/restriction_enzyme+ directly. If # instead you're going to directly call Bio::RestrictionEnzyme::Analysis # then only +bio+ needs to be +required+. # # require 'bio' # require 'bio/util/restriction_enzyme' # # seq = Bio::Sequence::NA.new('gaattc') # cuts = seq.cut_with_enzyme('EcoRI') # cuts.primary # => ["aattc", "g"] # cuts.complement # => ["cttaa", "g"] # cuts.inspect # => "[#, #]" # # seq = Bio::Sequence::NA.new('gaattc') # cuts = seq.cut_with_enzyme('g^aattc') # cuts.primary # => ["aattc", "g"] # cuts.complement # => ["cttaa", "g"] # # seq = Bio::Sequence::NA.new('gaattc') # cuts = seq.cut_with_enzyme('g^aattc', 'gaatt^c') # cuts.primary # => ["aattc", "c", "g", "gaatt"] # cuts.complement # => ["c", "cttaa", "g", "ttaag"] # # seq = Bio::Sequence::NA.new('gaattcgaattc') # cuts = seq.cut_with_enzyme('EcoRI') # cuts.primary # => ["aattc", "aattcg", "g"] # cuts.complement # => ["cttaa", "g", "gcttaa"] # # seq = Bio::Sequence::NA.new('gaattcgggaattc') # cuts = seq.cut_with_enzyme('EcoRI') # cuts.primary # => ["aattc", "aattcggg", "g"] # cuts.complement # => ["cttaa", "g", "gcccttaa"] # # cuts[0].inspect # => "#" # # cuts[0].primary # => "g " # cuts[0].complement # => "cttaa" # # cuts[1].primary # => "aattcggg " # cuts[1].complement # => " gcccttaa" # # cuts[2].primary # => "aattc" # cuts[2].complement # => " g" # # == Advanced # # require 'bio' # # enzyme_1 = Bio::RestrictionEnzyme.new('anna', [1,1], [3,3]) # enzyme_2 = Bio::RestrictionEnzyme.new('gg', [1,1]) # a = Bio::RestrictionEnzyme::Analysis.cut('agga', enzyme_1, enzyme_2) # a.primary # => ["a", "ag", "g", "ga"] # a.complement # => ["c", "ct", "t", "tc"] # # a[0].primary # => "ag" # a[0].complement # => "tc" # # a[1].primary # => "ga" # a[1].complement # => "ct" # # a[2].primary # => "a" # a[2].complement # => "t" # # a[3].primary # => "g" # a[3].complement # => "c" # # = Todo / under development # # * Circular DNA cutting # class RestrictionEnzyme #require 'bio/util/restriction_enzyme/cut_symbol' autoload :CutSymbol, 'bio/util/restriction_enzyme/cut_symbol' autoload :StringFormatting, 'bio/util/restriction_enzyme/string_formatting' autoload :SingleStrand, 'bio/util/restriction_enzyme/single_strand' autoload :SingleStrandComplement, 'bio/util/restriction_enzyme/single_strand_complement' autoload :DoubleStranded, 'bio/util/restriction_enzyme/double_stranded' autoload :Analysis, 'bio/util/restriction_enzyme/analysis' autoload :Range, 'bio/util/restriction_enzyme/range/sequence_range' include CutSymbol extend CutSymbol # See Bio::RestrictionEnzyme::DoubleStranded.new for more information. # # --- # *Arguments* # * +users_enzyme_or_rebase_or_pattern+: One of three possible parameters: The name of an enzyme, a REBASE::EnzymeEntry object, or a nucleotide pattern with a cut mark. # * +cut_locations+: The cut locations in enzyme index notation. # *Returns*:: Bio::RestrictionEnzyme::DoubleStranded #-- # Factory for DoubleStranded #++ def self.new(users_enzyme_or_rebase_or_pattern, *cut_locations) DoubleStranded.new(users_enzyme_or_rebase_or_pattern, *cut_locations) end # REBASE enzyme data information # # Returns a Bio::REBASE object loaded with all of the enzyme data on file. # # --- # *Arguments* # * _none_ # *Returns*:: Bio::REBASE def self.rebase enzymes_yaml_file = File.join(File.dirname(File.expand_path(__FILE__)), 'restriction_enzyme', 'enzymes.yaml') @@rebase_enzymes ||= Bio::REBASE.load_yaml(enzymes_yaml_file) @@rebase_enzymes end # Check if supplied name is the name of an available enzyme # # See Bio::REBASE.enzyme_name? # # --- # *Arguments* # * +name+: Enzyme name # *Returns*:: +true+ _or_ +false+ def self.enzyme_name?( name ) self.rebase.enzyme_name?(name) end # See Bio::RestrictionEnzyme::Analysis.cut def self.cut( sequence, enzymes ) Bio::RestrictionEnzyme::Analysis.cut( sequence, enzymes ) end # A Bio::RestrictionEnzyme::Fragment is a DNA fragment composed of fused primary and # complementary strands that would be found floating in solution after a full # sequence is digested by one or more RestrictionEnzymes. # # You will notice that either the primary or complement strand will be # padded with spaces to make them line up according to the original DNA # configuration before they were cut. # # Example: # # Fragment 1: # primary = "attaca" # complement = " atga" # # Fragment 2: # primary = "g " # complement = "cta" # # View these with the +primary+ and +complement+ methods. # # Bio::RestrictionEnzyme::Fragment is a simple +Struct+ object. # # Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragment Fragment = Struct.new(:primary, :complement, :p_left, :p_right, :c_left, :c_right) # Bio::RestrictionEnzyme::Fragments inherits from +Array+. # # Bio::RestrictionEnzyme::Fragments is a container for Fragment objects. It adds the # methods +primary+ and +complement+ which returns an +Array+ of all # respective strands from it's Fragment members in alphabetically sorted # order. Note that it will # not return duplicate items and does not return the spacing/padding # that you would # find by accessing the members directly. # # Example: # # primary = ['attaca', 'g'] # complement = ['atga', 'cta'] # # Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments class Fragments < Array def primary; strip_and_sort(:primary); end def complement; strip_and_sort(:complement); end protected def strip_and_sort( sym_strand ) self.map {|uf| uf.send( sym_strand ).tr(' ', '') }.sort end end end # RestrictionEnzyme end # Bio