require 'json' require 'scbi_fasta' require 'sequence' require 'fln_stats' include FlnStats class MyWorkerManager < ScbiMapreduce::WorkManager # open files and prepare global data def self.init_work_manager(options,chunk_size=100) input_file=options[:fasta] if !File.exists?('fln_results') Dir.mkdir('fln_results') end file_head = "Query_id\tfasta_length\tSubject_id\tdb_name\tStatus\tt_code\te_value\tp_ident\tprotein_length\ts_length\tWarning_msgs\tframe\tORF_start\tORF_end\ts_start\ts_end\tDescription\tProtein_sequence" @@fasta_file = FastaQualFile.new(input_file,'') @@chunk_size=chunk_size @@options = options @@annotation_file = File.open("fln_results/dbannotated.txt", 'w') @@annotation_file.puts file_head @@alignment_file = File.open("fln_results/alignments.txt", 'w') @@prot_file = File.open("fln_results/proteins.fasta", 'w') @@nts_file = File.open("fln_results/nt_seq.txt", 'w') @@tcode_file=File.open("fln_results/new_coding.txt", 'w') @@tcode_file.puts file_head @@nc_rna_file = File.open("fln_results/nc_rnas.txt", 'w') @@nc_rna_file.puts file_head # @@error_fasta_file = File.open("fln_results/error_seqs.fasta", 'w') # @@error_file = File.open("fln_results/errors_info.txt", 'w') end # close files def self.end_work_manager # @@fasta_file.close @@annotation_file.close @@alignment_file.close @@prot_file.close @@nts_file.close @@tcode_file.close @@nc_rna_file.close # @@error_fasta_file.close # @@error_file.close summary_stats end def error_received(worker_error, obj) puts "Error while processing object #{obj.inspect}\n" + worker_error.original_exception.message + ":\n" +worker_error.original_exception.backtrace.join("\n") end def too_many_errors_received $LOG.error "Too many errors: #{@@error_count} errors on #{@@count} executed sequences, exiting before finishing" end # send initial config def worker_initial_config return @@options end # this method is called every time a worker needs a new work # Return the work data or nil if no more data is available def next_work # seqs=[] # if (@@count % 2==0) # $LOG.info("Processed #{@@count}") # end # prepare work # @@chunk_size.times do n,f,q = @@fasta_file.next_seq if !n.nil? return Sequence.new(n,f,q) else return nil end # end # return work # if !seqs.empty? # return seqs # else # return nil # end end # this method is ejecuted each time an obj is finished def work_received(obj) obj.each do |seq| # puts seq.seq_name write_seq(seq) end end def write_seq(seq) begin # -------------------------------------------------------- Complete Seqs if (e=seq.get_annotations(:complete).first) @@annotation_file.puts e[:message] if (a=seq.get_annotations(:alignment).first) @@alignment_file.puts a[:message] end if (p=seq.get_annotations(:protein).first) @@prot_file.puts p[:message] end if (n=seq.get_annotations(:nucleotide).first) @@nts_file.puts n[:message] end # -------------------------------------------------------- Non Complete Seqs elsif (e=seq.get_annotations(:tmp_annotation).first) @@annotation_file.puts e[:message][0] if (a=seq.get_annotations(:alignment).first) if !a[:message].empty? @@alignment_file.puts a[:message] end end if (p=seq.get_annotations(:protein).first) if !p[:message].empty? @@prot_file.puts p[:message] end end if (n=seq.get_annotations(:nucleotide).first) @@nts_file.puts n[:message] end # -------------------------------------------------------- nc RNA elsif (nc=seq.get_annotations(:ncrna).first) @@nc_rna_file.puts nc[:message] # -------------------------------------------------------- Test Code elsif (t=seq.get_annotations(:tcode).first) @@tcode_file.puts t[:message] end # -------------------------------------------------------- errors # if e=seq.get_annotations(:error).first # if !e[:message].empty? # @@error_fasta_file.puts ">#{seq.seq_name}\n#{seq.seq_fasta}" # @@error_file.puts e[:message] # end # end rescue puts "Error printing #{seq.seq_name}" end end end