#require "bio" #require "blast_seq_extract/version" require "bio/blast_seq_extract/version" ##################################################################################3 class BlastSeqExtract #class Error < StandardError; end attr_accessor :subject_regions, :seqObjs def initialize @subject_regions = Hash.new @seqObjs = Hash.new end def readBlast(infile, evalue_cutoff=1, max_no_seqs=1) in_fh = File.open(infile, 'r') in_fh.each_line do |line| #query subject 98.391 373 6 0 660 1032 373 1 0.0 656 line.chomp! line_arr = line.split("\t") query, subject = line_arr[0, 2] q_start, q_end, s_start, s_end = line_arr.values_at(6,7,8,9).map(&:to_i) evalue = line_arr[10].to_f if evalue <= evalue_cutoff @subject_regions[subject] = [s_start, s_end, evalue] break if @subject_regions.size == max_no_seqs end end in_fh.close end def readSeq(infile) in_fh = Bio::FlatFile.open(infile, 'r') in_fh.each_entry do |f| title = f.definition.split(' ')[0] @seqObjs[title] = f end in_fh.close end def getRegion() @gs = Array.new @subject_regions.each_pair do |subject, arr| s_start, s_end, evalue = arr seq = @seqObjs[subject].seq if s_start > s_end seq = Bio::Sequence.auto(seq).reverse_complement s_start, s_end = s_end, s_start end definition = [subject, arr[0,2].join('-')].flatten.join('_') seq = seq[s_start, s_end-s_start].upcase g = Bio::FastaFormat.new('>'+definition+"\n"+seq) @gs << g end return(@gs) end end