#finds the classes that were in the folder 'classes' # ROOT_PATH=File.dirname(File.dirname(File.dirname(__FILE__))) # # $: << File.expand_path(File.join(ROOT_PATH, 'classes')) # $: << File.expand_path(File.join(ROOT_PATH, 'classes','blast')) # # #finds the classes that were in the folder 'plugins' # $: << File.expand_path(File.join(ROOT_PATH, 'plugins')) # # #finds the classes that were in the folder 'plugins' # $: << File.expand_path(File.join(ROOT_PATH, 'actions')) # # #finds the classes that were in the folder 'utils' # $: << File.expand_path(File.join(ROOT_PATH, 'utils')) # # $: << File.expand_path(File.join(ROOT_PATH, 'classes','em_classes')) # # $: << File.expand_path(ROOT_PATH) $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/') $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib') require 'seqtrimnext' $SEQTRIM_PATH = ROOT_PATH if ENV['BLASTDB']# && Dir.exists?(ENV['BLASTDB']) $FORMATTED_DB_PATH = ENV['BLASTDB'] $DB_PATH = File.dirname($FORMATTED_DB_PATH) else $FORMATTED_DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB",'formatted')) $DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB")) end ENV['BLASTDB']=$FORMATTED_DB_PATH OUTPUT_PATH='output_files' puts "FORMATTED_DB_BLAST in workers: #{$FORMATTED_DB_PATH}" require 'scbi_mapreduce' require 'params' require 'action_manager' require 'plugin_manager' # require 'sequence_with_action' # require 'scbi_fastq' require 'sequence_group' class SeqtrimWorker < ScbiMapreduce::Worker def process_object(obj) running_seqs=SequenceGroup.new(obj) # execute plugins @plugin_manager.execute_plugins(running_seqs) # add output data add_output_data(running_seqs) return running_seqs end def receive_initial_config(obj) # Reads the parameters $WORKER_LOG.info "Params received" # @params = Params.new(params_path) @params = obj @use_qual=@params.get_param('use_qual') @use_json=@params.get_param('use_json') end def starting_worker # $WORKER_LOG.level = Logger::ERROR $WORKER_LOG.level = Logger::WARN $WORKER_LOG.info "Loading actions" @action_manager = ActionManager.new $WORKER_LOG.info "Loading plugins" @plugin_list = @params.get_param('plugin_list') # puts in plugin_list the plugins's array $WORKER_LOG.info "PLUGIN LIST:" + @plugin_list @plugin_manager = PluginManager.new(@plugin_list,@params) # creates an instance from PluginManager. This must storage the plugins and load it rescue Exception => e puts (e.message+ e.backtrace.join("\n")) end def closing_worker end def add_output_data(obj) obj.output_text=[] obj.each do |seq| obj.output_text << seq.to_text write_seq_to_files(obj.output_files,seq, obj.stats) end # @remove seqs since they are not needed anymore to write output files obj.remove_all_seqs end def add_stat(stats,key,subkey,value,count=1) stats[key]={} if !stats[key] stats[key][subkey]={} if !stats[key][subkey] stats[key][subkey][value]=0 if !stats[key][subkey][value] stats[key][subkey][value]+=count end def write_seq_to_files(files,seq, stats) # puts stats.to_json dir_name,file_name=seq.get_file_tag_path # puts File.join(dir_name,'sequences_'+file_name) # get current inserts inserts = seq.get_inserts # qualities are optional if @use_qual qual_inserts = seq.get_qual_inserts end # save json if necessary if @use_json json_file(files)<< seq.to_json end # find mids mid = seq.get_actions(ActionMid).first if (seq.seq_rejected) # sequence rejected #save to rejected sequences message = seq.seq_rejected_by_message rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message) add_stat(stats,'sequences','rejected',seq.seq_rejected_by_message) add_stat(stats,'sequences','count','rejected') elsif (inserts.empty?) #sequence with no inserts message = 'No valid inserts found' rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message) add_stat(stats,'sequences','rejected',message) add_stat(stats,'sequences','count','rejected') elsif (inserts.count == 2) # sequence with two inserts = PAIRED SEQUENCES add_stat(stats,'sequences','count','output_seqs_paired') # TODO - Add this stats to full stats # @@full_stats.add_stats({'sequences' => {'paired' => {'count' => 1}}}) if (mid.nil? || (mid.message=='no_MID') ) # without mid mid_id = 'no_MID' mid_message = ' No MID found' else mid_id = mid.tag_id mid_message='' if mid_id != mid_message mid_message = ' '+mid.message end end # fasta_file = get_paired_file(mid_id) n="#{seq.seq_name}_left" c="template=#{seq.seq_name} dir=R library=#{mid_id}" f=inserts[0].reverse.tr('actgACTG','tgacTGAC') q=[] if @use_qual q=qual_inserts[0].reverse end paired_file(files,dir_name,file_name)<