Sha256: 9fa29ed857c3a94ed869c2ecb68443b7b36a77f72a846ed056065dac9320f40f

Contents?: true

Size: 1.6 KB

Versions: 2

Compression:

Stored size: 1.6 KB

Contents

#require "bio"
#require "blast_seq_extract/version"
require "bio/blast_seq_extract/version"


##################################################################################3
class BlastSeqExtract
  #class Error < StandardError; end
  attr_accessor :subject_regions, :seqObjs
  
  def initialize
    @subject_regions = Hash.new
    @seqObjs = Hash.new
  end

  def readBlast(infile, evalue_cutoff=1, max_no_seqs=1)
    in_fh = File.open(infile, 'r')
    in_fh.each_line do |line|
      #query  subject 98.391  373 6 0 660 1032  373 1 0.0 656
      line.chomp!
      line_arr = line.split("\t")
      query, subject = line_arr[0, 2]
      q_start, q_end, s_start, s_end = line_arr.values_at(6,7,8,9).map(&:to_i)
      evalue = line_arr[10].to_f
      if evalue <= evalue_cutoff
        @subject_regions[subject] = [s_start, s_end, evalue]
        break if @subject_regions.size == max_no_seqs
      end
    end
    in_fh.close
  end

  def readSeq(infile)
    in_fh = Bio::FlatFile.open(infile, 'r')
    in_fh.each_entry do |f|
      title = f.definition.split(' ')[0]
      @seqObjs[title] = f
    end
    in_fh.close
  end

  def getRegion()
    @gs = Array.new
    @subject_regions.each_pair do |subject, arr|
      s_start, s_end, evalue = arr
      seq = @seqObjs[subject].seq
      if s_start > s_end
        seq = Bio::Sequence.auto(seq).reverse_complement
        s_start, s_end = s_end, s_start
      end
      definition = [subject, arr[0,2].join('-')].flatten.join('_')
      seq = seq[s_start, s_end-s_start].upcase
      g = Bio::FastaFormat.new('>'+definition+"\n"+seq)
      @gs << g
    end
    return(@gs)
  end
end


Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
bio-blast_seq_extract-0.2.5 lib/bio/blast_seq_extract.rb
bio-blast_seq_extract-0.2.4 lib/bio/blast_seq_extract.rb