require 'bio' require 'fileutils' require 'neurohmmer/arg_validators' require 'neurohmmer/hmmer' require 'neurohmmer/output' # Top level module / namespace. module Neurohmmer class <([^\n]*)\n([A-Za-z\n\*]*)/)[0] end private def init_input FileUtils.mkdir_p(@opt[:temp_dir]) @opt[:input_file] = translate_input if @opt[:type] == :genetic @input_index = index_input_file end # Translates the input data in all 6 frames def translate_input(input = @opt[:input_file]) translated_file = File.join(@opt[:temp_dir], 'input.translated.fa') File.open(translated_file, 'w') do |file| Bio::FlatFile.open(Bio::FastaFormat, input).each_entry do |entry| (1..6).each do |f| file.puts ">#{entry.definition}-frame:#{f}" file.puts entry.naseq.translate(f) end end end translated_file end # Indexes the input file - returns a hash in the following format: # {seq id: [start byte in file, end byte in file] } def index_input_file c = IO.binread(@opt[:input_file]) keys = c.scan(/>(.*)\n/).flatten values = c.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) } index(c, keys, values) end # A method run from index_input_file that creates a simple hash with the # {seq id: [start byte in file, end byte in file] } def index(content, keys, values) fasta_index = {} keys.each_with_index do |k, i| id = k[0..115].gsub(/\s+/, '') endf = (i == values.length - 1) ? content.length - 1 : values[i + 1] fasta_index[id] = [values[i], endf] end fasta_index end def remove_temp_dir return unless File.directory?(@opt[:temp_dir]) FileUtils.rm_rf(@opt[:temp_dir]) end end end