Sha256: 0480a3c8cc9a13c8883a24a1299da1cc45cd20e0b2c0999875be25ddee053016
Contents?: true
Size: 1.94 KB
Versions: 1
Compression:
Stored size: 1.94 KB
Contents
require 'bio/io/flatfile' require 'bio/db/fasta' require 'stringio' class Bio::FlatFile include Enumerable end class Bio::FastaFormat alias_method :header, :definition alias_method :sequence, :seq end module Mspire # A convenience class for working with fasta formatted sequence databases. # the file which includes this class also includes Enumerable with # Bio::FlatFile so you can do things like this: # # accessions = Mspire::Fasta.open("file.fasta") do |fasta| # fasta.map(&:accession) # end # # A few aliases are added to Bio::FastaFormat # # entry.header == entry.definition # entry.sequence == entry.seq # # Mspire::Fasta.new accepts both an IO object or a String (a fasta formatted # string itself) # # # taking an io object: # File.open("file.fasta") do |io| # fasta = Mspire::Fasta.new(io) # ... do something with it # end # # taking a string # string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n" # fasta = Mspire::Fasta.new(string) # (simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ } module Fasta # opens the flatfile and yields a Bio::FlatFile object def self.open(file, &block) Bio::FlatFile.open(Bio::FastaFormat, file, &block) end # yields each Bio::FastaFormat object in turn def self.foreach(file, &block) block or return enum_for(__method__, file) Bio::FlatFile.open(Bio::FastaFormat, file) do |fasta| fasta.each(&block) end end # takes an IO object or a string that is the fasta data itself def self.new(io) io = StringIO.new(io) if io.is_a?(String) Bio::FlatFile.new(Bio::FastaFormat, io) end # takes the header string and returns the uniprot id # # 'sp|Q04917|1433F_HUMAN' #=> 'Q04917' def self.uniprot_id(header) header[/^[^\|]+\|([^\|]+)\|/, 1] end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
mspire-0.8.5 | lib/mspire/fasta.rb |