# Copyright 2014 Ryan Moore # Contact: moorer@udel.edu # # This file is part of parse_fasta. # # parse_fasta is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # parse_fasta is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with parse_fasta. If not, see . # Provides simple interface for parsing fasta format files. class FastaFile < File # Analagous to File#each_line, #each_record is used to go through a # fasta file record by record. # # @param separate_lines [Object] If truthy, separate lines of record # into an array, but if falsy, yield a Sequence object for the # sequence instead. # # @example Parsing a fasta file (default behavior) # FastaFile.open('reads.fna', 'r').each_record do |header, sequence| # puts [header, sequence.gc].join("\t") # end # # @example Parsing a fasta file (with truthy value param) # FastaFile.open('reads.fna','r').each_record(1) do |header, sequence| # # header => 'sequence_1' # # sequence => ['AACTG', 'AGTCGT', ... ] # end # # @yield The header and sequence for each record in the fasta # file to the block # # @yieldparam header [String] The header of the fasta record without # the leading '>' # # @yieldparam sequence [Sequence, Array] The sequence of the # fasta record. If `separate_lines` is falsy (the default # behavior), will be Sequence, but if truthy will be # Array. def each_record(separate_lines=nil) if separate_lines self.each("\n>") do |line| header, sequence = parse_line_separately(line) yield(header.strip, sequence) end else self.each("\n>") do |line| header, sequence = parse_line(line) yield(header.strip, Sequence.new(sequence)) end end end private def parse_line(line) line.chomp.split("\n", 2).map { |s| s.gsub(/\n|>/, '') } end def parse_line_separately(line) #line.chomp.split("\n", 2).map { |s| s.gsub(/>/, '') } header, sequence = line.chomp.split("\n", 2).map { |s| s.gsub(/>/, '') } sequences = sequence.split("\n").reject { |s| s.empty? } [header, sequences] end end