$: << File.expand_path(File.join(File.dirname(__FILE__))) require 'scbi_mapreduce' require 'scbi_blast' require 'json' require 'sequence' require 'fl_string_utils' require "lcs" # like the class simliar of seqtrim, return the longest common sequence require "test_code" require 'fl_analysis' include FlAnalysis require 'nc_rna' include NcRna class MyWorker < ScbiMapreduce::Worker def starting_worker # $WORKER_LOG.info "Loading actions" rescue Exception => e puts (e.message+ e.backtrace.join("\n")) end def receive_initial_config(obj) # Reads the parameters # $WORKER_LOG.info "Params received: #{obj.to_json}" @options = obj end def process_object(obj) full_lenghter2(obj) return obj end def closing_worker end # ejecuta blast utilizando los parametros fichero de entrada, base de datos, fichero de salida y tipo de blast def run_blast(input, database, blast_type, evalue) blast=BatchBlast.new("-db #{database}",blast_type,"-evalue #{evalue} -max_target_seqs 1") blast_result = blast.do_blast_seqs(input, :xml) return blast_result end def full_lenghter2(seqs) # -------------------------------------------- User database # if the user has included his own database in the parameters entry, # the location of the database is tested, and blast and the results analysis is done if (@options[:user_db]) if (@options[:user_db] =~ /\//) user_db_name = @options[:user_db].sub(/.+\//,'') end if !File.exists?("#{File.expand_path(@options[:user_db])}.psq") puts "user database: #{@options[:user_db]} was not found" exit end # do blast my_blast = run_blast(seqs, "#{@options[:user_db]}", 'blastx', '1e-6') # split and parse blast seqs.each_with_index do |seq,i| analiza_orf_y_fl(seq, my_blast.querys[i], @options, user_db_name) end new_seqs=seqs.select{|s| s.get_annotations(:complete).empty?} else new_seqs = seqs end # -------------------------------------------- UniProt (sp) # blast sp_path=File.join("sp_#{@options[:tax_group]}","sp_#{@options[:tax_group]}.fasta") my_blast = run_blast(new_seqs, sp_path, 'blastx', '1e-6') # split and parse blast new_seqs.each_with_index do |seq,i| analiza_orf_y_fl(seq, my_blast.querys[i], @options, "sp_#{@options[:tax_group]}") end new_seqs=seqs.select{|s| s.get_annotations(:complete).empty?} # -------------------------------------------- UniProt (tr) # blast tr_path=File.join("tr_#{@options[:tax_group]}","tr_#{@options[:tax_group]}.fasta") my_blast = run_blast(new_seqs, tr_path, 'blastx', '1e-6') # split and parse blast new_seqs.each_with_index do |seq,i| analiza_orf_y_fl(seq, my_blast.querys[i], @options, "tr_#{@options[:tax_group]}") end # -------------------------------------------- Test Code # the sequences without a reliable similarity with an orthologue are processed with Test Code testcode_input=seqs.select{|s| !s.get_annotations(:apply_tcode).empty?} # active this line to test tcode, and comment all lines above in this function # testcode_input=seqs testcode_input.each do |seq| TestCode.new(seq) end # -------------------------------------------- nc RNA unknown_seqs=seqs.select{|s| !s.get_annotations(:tcode_unknown).empty?} # run blastn ncrna_path=File.join('nc_rna_db','ncrna_fln_100.fasta') my_blast = run_blast(unknown_seqs, ncrna_path, 'blastn', '1e-3') # split and parse blast unknown_seqs.each_with_index do |seq,i| find_nc_rna(seq, my_blast.querys[i]) end # --------------------------------------------------- end end