#finds the classes that were in the folder 'classes'

# ROOT_PATH=File.dirname(File.dirname(File.dirname(__FILE__)))
# 
# $: << File.expand_path(File.join(ROOT_PATH, 'classes'))
# $: << File.expand_path(File.join(ROOT_PATH, 'classes','blast'))
# 
# #finds the classes that were in the folder 'plugins'
# $: << File.expand_path(File.join(ROOT_PATH, 'plugins'))
# 
# #finds the classes that were in the folder 'plugins'
# $: << File.expand_path(File.join(ROOT_PATH, 'actions'))
# 
# #finds the classes that were in the folder 'utils'
# $: << File.expand_path(File.join(ROOT_PATH, 'utils'))
# 
# $: << File.expand_path(File.join(ROOT_PATH, 'classes','em_classes'))
# 
# $: << File.expand_path(ROOT_PATH)

require 'seqtrimnext'

$SEQTRIM_PATH = ROOT_PATH


if ENV['BLASTDB']# && Dir.exists?(ENV['BLASTDB'])
  $FORMATTED_DB_PATH = ENV['BLASTDB']
  $DB_PATH = File.dirname($FORMATTED_DB_PATH)
else
  $FORMATTED_DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB",'formatted'))
  $DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB"))  
end

ENV['BLASTDB']=$FORMATTED_DB_PATH

OUTPUT_PATH='output_files'

puts "FORMATTED_DB_BLAST in workers: #{$FORMATTED_DB_PATH}"
# $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')

require 'scbi_mapreduce'
require 'params'
require 'action_manager'
require 'plugin_manager'
# require 'sequence_with_action'
# 
require 'scbi_fastq'
require 'sequence_group'

class SeqtrimWorker <  ScbiMapreduce::Worker

    def process_object(obj)
          running_seqs=SequenceGroup.new(obj)
          
         # execute plugins
         @plugin_manager.execute_plugins(running_seqs)
         
         # add output data
          add_output_data(running_seqs)
         
			   return running_seqs
		end
		
    def receive_initial_config(obj)

				# Reads the parameters
				$WORKER_LOG.info "Params received"
#				@params = Params.new(params_path)
				@params = obj
				
				@use_qual=@params.get_param('use_qual')
				@use_json=@params.get_param('use_json')
    end

		def starting_worker

        # $WORKER_LOG.level = Logger::ERROR
        $WORKER_LOG.level = Logger::WARN
				$WORKER_LOG.info "Loading actions"

				@action_manager = ActionManager.new

				$WORKER_LOG.info "Loading plugins"
				@plugin_list = @params.get_param('plugin_list') # puts in plugin_list the plugins's array
				$WORKER_LOG.info "PLUGIN LIST:" + @plugin_list
				
				@plugin_manager = PluginManager.new(@plugin_list,@params) # creates an instance from PluginManager. This must storage the plugins and load it    
		
		rescue Exception => e
			puts (e.message+ e.backtrace.join("\n"))
		
		end
		
		
		def closing_worker
			 
		end		
		
		
		def add_output_data(obj)
      obj.output_text=[]
      
		  obj.each do |seq|
		    	obj.output_text << seq.to_text
          write_seq_to_files(obj.output_files,seq, obj.stats)
		  end
		  
      # @remove seqs since they are not needed anymore to write output files
      obj.remove_all_seqs
	  end
	  
	  def add_stat(stats,key,subkey,value,count=1)
	    
	    stats[key]={} if !stats[key]
      stats[key][subkey]={} if !stats[key][subkey]
      stats[key][subkey][value]=0 if !stats[key][subkey][value]
      
  	  stats[key][subkey][value]+=count
    end

  	def write_seq_to_files(files,seq, stats)
      # puts stats.to_json
      
      dir_name,file_name=seq.get_file_tag_path
      # puts File.join(dir_name,'sequences_'+file_name)
      
      # get current inserts
      inserts = seq.get_inserts
      
      # qualities are optional
      if @use_qual
        qual_inserts = seq.get_qual_inserts
 	    end
      
      # save json if necessary
      if @use_json
        json_file(files)<< seq.to_json
      end
      
      # find mids
      mid = seq.get_actions(ActionMid).first
      
      if (seq.seq_rejected)           # sequence rejected
      
      	#save to rejected sequences 
        message = seq.seq_rejected_by_message 
        rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message)
        
        add_stat(stats,'sequences','rejected',seq.seq_rejected_by_message)
        add_stat(stats,'sequences','count','rejected')
    	  

      elsif (inserts.empty?)  #sequence with no inserts
        message = 'No valid inserts found'
        rejected_output_file(files)<<('>'+seq.seq_name+ ' ' + message)
        
        add_stat(stats,'sequences','rejected',message)
        add_stat(stats,'sequences','count','rejected')
        
			elsif (inserts.count == 2) # sequence with two inserts  = PAIRED SEQUENCES
        add_stat(stats,'sequences','count','output_seqs_paired')

        # TODO - Add this stats to full stats
        # @@full_stats.add_stats({'sequences' => {'paired' => {'count' => 1}}})
  	    
				if (mid.nil? || (mid.message=='no_MID') ) # without mid
					mid_id = 'no_MID'
					mid_message = ' No MID found'
				else
					mid_id = mid.tag_id
					mid_message=''
					if mid_id != mid_message
						mid_message = ' '+mid.message
					end
				end

        # fasta_file = get_paired_file(mid_id)

        n="#{seq.seq_name}_left"
        c="template=#{seq.seq_name} dir=R library=#{mid_id}"
        f=inserts[0].reverse.tr('actgACTG','tgacTGAC')
        q=[]
        if @use_qual
            q=qual_inserts[0].reverse			    
   	    end   

        paired_file(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
        
        
        n="#{seq.seq_name}_right"
        c="template=#{seq.seq_name} dir=F library=#{mid_id}"
        f=inserts[1]
        q=[]
        if @use_qual
	          q=qual_inserts[1]
   	    end
        
        paired_file(files,dir_name,file_name)<<FastqFile.to_fastq(n,f,q,c)
        
        
      elsif (inserts.count == 1) # sequence with one insert

				if (mid.nil? || (mid.message=='no_MID') ) # without mid
					mid_id = 'no_MID'
					mid_message = ' No MID found'
				else
					mid_id = mid.tag_id
					mid_message=''
					if mid_id != mid_message
						mid_message = ' '+mid.message
					end
				end

       # save fasta and qual in no MID file
        has_low_complexity = seq.get_actions(ActionLowComplexity)
        
        if has_low_complexity.empty?
          add_stat(stats,'sequences','count','output_seqs')
          
          # fasta_file = get_sequence_file(mid_id)
          # sff_file=get_sffinfo_file(mid_id)
          fasta_file=sequence_file(files,dir_name,file_name)
          sff_file=sffinfo_file(files,dir_name,file_name)
        else
          add_stat(stats,'sequences','count','output_seqs_low_complexity')
          
          # fasta_file = get_low_complexity_file(mid_id)
          # sff_file=get_low_sffinfo_file(mid_id)
          fasta_file=low_complexity_file(files,dir_name,file_name)
          sff_file=low_sffinfo_file(files,dir_name,file_name)
        end
        
        q=[]
        if @use_qual
	         q=qual_inserts[0]
				end
        
        n=seq.seq_name
        c=mid_message
        f=inserts[0]
        
        fasta_file << FastqFile.to_fastq(n,f,q,c)
        
        inserts_pos = seq.get_actions(ActionInsert)
        
        sff_file<< "#{n} #{inserts_pos[0].start_pos+1} #{inserts_pos[0].end_pos+1}"
        
      end
  	  
	  end
	  
	  
	  # ACCESS TO FILES

	  def json_file(files)
      return get_file(files,File.join(OUTPUT_PATH,'results.json'))
	  end
	  
	  def rejected_output_file(files)
      return get_file(files,File.join(OUTPUT_PATH,'rejected.txt'))
	  end
	  
	  
	  def sequence_file(files, dir_name, file_name)
      return get_file(files,File.join(OUTPUT_PATH,dir_name,'sequences_'+file_name+'.fastq'))
	  end 	
	  
    def paired_file(files, dir_name, file_name)
      return get_file(files,File.join(OUTPUT_PATH,dir_name,'paired_'+file_name+'.fastq'))
    end

    def low_complexity_file(files, dir_name, file_name)
      return get_file(files,File.join(OUTPUT_PATH,dir_name,'low_complexity_'+file_name+'.fastq'))
    end
	  
	  def sffinfo_file(files, dir_name, file_name)
      return get_file(files,File.join(OUTPUT_PATH,dir_name,'sff_info_'+file_name+'.txt'))
    end

	  def low_sffinfo_file(files, dir_name, file_name)
      return get_file(files,File.join(OUTPUT_PATH,dir_name,'low_complexity_sff_info_'+file_name+'.txt'))
    end
	  
	  def get_file(files,fn)
	    res=files[fn]
	    
	    if !res
	      files[fn]=[]
	      res=files[fn]
      end
      
      return res
    end
	  
end