require 'forwardable'
# Statsample: must loaded before /ext/array as they modify a built-in class
require 'statsample'

require 'genevalidator/blast'
require 'genevalidator/exceptions'
require 'genevalidator/ext/array'
require 'genevalidator/output'
require 'genevalidator/pool'
require 'genevalidator/query'
require 'genevalidator/validation_maker_qi'
require 'genevalidator/validation_length_cluster'
require 'genevalidator/validation_length_rank'
require 'genevalidator/validation_blast_reading_frame'
require 'genevalidator/validation_gene_merge'
require 'genevalidator/validation_duplication'
require 'genevalidator/validation_open_reading_frame'
require 'genevalidator/validation_alignment'

# Top level module / namespace.
module GeneValidator
  Pair1 = Struct.new(:x, :y)

  # Class that initalises a separate Validate.new() instance for each query.
  class Validations
    extend Forwardable
    def_delegators GeneValidator, :opt, :config, :query_idx
    def initialize
      @opt       = opt
      @config    = config
      @query_idx = query_idx
    end

    ##
    #
    def run_validations(iterator)
      p = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1

      check_if_maker_input?

      while @config[:idx] + 1 < @query_idx.length
        prediction = get_info_on_query_sequence
        @config[:idx] += 1

        blast_hits = parse_next_iteration(iterator, prediction)

        if blast_hits.nil?
          @config[:idx] -= 1
          break
        end

        if @opt[:num_threads] == 1
          (Validate.new).validate(prediction, blast_hits, @config[:idx])
        else
          p.schedule(prediction, blast_hits, @config[:idx]) do |pred, hits, idx|
            (Validate.new).validate(pred, hits, idx)
          end
        end
      end
    ensure
      p.shutdown if @opt[:num_threads] > 1
    end

    ##
    # get info about the query
    def get_info_on_query_sequence(input_file = @opt[:input_fasta_file],
                                        seq_type = @config[:type])
      start_offset = @query_idx[@config[:idx] + 1] - @query_idx[@config[:idx]]
      end_offset   = @query_idx[@config[:idx]]
      query        = IO.binread(input_file, start_offset, end_offset)
      parse_query  = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]

      prediction                = Query.new
      prediction.definition     = parse_query[0].gsub("\n", '')
      prediction.identifier     = prediction.definition.gsub(/ .*/, '')
      prediction.type           = seq_type
      prediction.raw_sequence   = parse_query[1].gsub("\n", '')
      prediction.length_protein = prediction.raw_sequence.length
      prediction.length_protein /= 3 if seq_type == :nucleotide
      prediction
    end

    # Adds 'maker' to @opt[:validations] if the first definiton in the input
    # fasta file contains MAKER's QI (quality index) score
    def check_if_maker_input?(input_file = @opt[:input_fasta_file])
      query        = IO.binread(input_file, @query_idx[1], @query_idx[0])
      parse_query  = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]
      definition   = parse_query[0].gsub("\n", '')
      number       = '\d*\.?\d*'
      qi_match     = definition.match(/QI:#{number}\|#{number}\|#{number}\|
                                      #{number}\|#{number}\|#{number}\|
                                      #{number}\|#{number}\|#{number}/x)
      return if qi_match.nil?
      @opt[:validations] << 'maker_qi'
    end

    def parse_next_iteration(iterator, prediction)
      iterator.next if @config[:idx] < @config[:start_idx]
      if @opt[:blast_xml_file]
        BlastUtils.parse_next(iterator)
      elsif @opt[:blast_tabular_file]
        iterator.parse_next(prediction.identifier)
      end
    end
  end

  # Class that runs the validations (Instatiated for each query)
  class Validate
    extend Forwardable
    def_delegators GeneValidator, :opt, :config, :mutex_array, :overview,
                   :query_idx

    ##
    # Initilizes the object
    # Params:
    # +opt+: A hash with the following keys: validations:, blast_tabular_file:,
    # blast_tabular_options:, blast_xml_file:, db:, raw_sequences:,
    # num_threads:, fast:}
    # +start_idx+: number of the sequence from the file to start with
    # +overall_evaluation+: boolean variable for printing overall evaluation
    def initialize
      @opt         = opt
      @config      = config
      @mutex_array = mutex_array
      @run_output  = nil
      @overview    = overview
      @query_idx   = query_idx
    end

    ##
    # Validate one query and create validation report
    # Params:
    # +prediction+: Sequence object
    # +hits+: Array of +Sequence+ objects
    # +current_idx+: the index number of the query
    def validate(prediction, hits, current_idx)
      hits = remove_identical_hits(prediction, hits)
      vals = create_validation_tests(prediction, hits)
      check_validations(vals)
      vals.each(&:run)
      @run_output = Output.new(current_idx, hits.length, prediction.definition)
      @run_output.validations = vals.map(&:validation_report)
      check_validations_output(vals)

      compute_scores
      generate_run_output
    end

    ##
    # Removes identical hits (100% coverage and >99% identity)
    # Params:
    # +prediction+: Sequence object
    # +hits+: Array of +Sequence+ objects
    # Output:
    # new array of hit +Sequence+ objects
    def remove_identical_hits(prediction, hits)
      identical_hits = []
      hits.each do |hit|
        low_identity = hit.hsp_list.select { |hsp| hsp.pidentity < 99 }
        no_data      = hit.hsp_list.select { |hsp| hsp.pidentity.nil? }
        low_identity += no_data
        # check the coverage
        coverage = Array.new(prediction.length_protein, 0)
        hit.hsp_list.each do |hsp|
          match_to   = hsp.match_query_to
          match_from = hsp.match_query_from
          len        = match_to - match_from + 1
          coverage[match_from - 1..match_to - 1] = Array.new(len, 1)
        end

        if low_identity.length == 0 && coverage.uniq.length == 1
          identical_hits.push(hit)
        end
      end

      identical_hits.each { |hit| hits.delete(hit) }
      hits
    end

    def create_validation_tests(prediction, hits)
      val = []
      val.push MakerQIValidation.new(prediction, hits)
      val.push LengthClusterValidation.new(prediction, hits)
      val.push LengthRankValidation.new(prediction, hits)
      val.push GeneMergeValidation.new(prediction, hits)
      val.push DuplicationValidation.new(prediction, hits)
      init_nucleotide_only_validations(val, prediction, hits)
      val.push AlignmentValidation.new(prediction, hits)
      val.select { |v| @opt[:validations].include? v.cli_name.downcase }
    end

    def init_nucleotide_only_validations(val, prediction, hits)
      return unless @config[:type] == :nucleotide
      val.push BlastReadingFrameValidation.new(prediction, hits)
      val.push OpenReadingFrameValidation.new(prediction, hits)
    end

    def check_validations(vals)
      # check the class type of the elements in the list
      vals.each { |v| fail ValidationClassError unless v.is_a? ValidationTest }
      # check alias duplication
      aliases = vals.map(&:cli_name)
      fail AliasDuplicationError unless aliases.length == aliases.uniq.length
    rescue ValidationClassError => e
      $stderr.puts e
      exit 1
    rescue AliasDuplicationError => e
      $stderr.puts e
      exit 1
    end

    def check_validations_output(vals)
      fail NoValidationError if @run_output.validations.length == 0
      vals.each do |v|
        fail ReportClassError unless v.validation_report.is_a? ValidationReport
      end
    rescue NoValidationError => e
      $stderr.puts e
      exit 1
    rescue ReportClassError => e
      $stderr.puts e
      exit 1
    end

    def compute_scores
      validations        = @run_output.validations
      scores             = {}
      scores[:successes] = validations.count { |v| v.result == v.expected }
      scores[:fails] = validations.count { |v| v.validation != :unapplicable && v.validation != :error && v.result != v.expected }
      scores         = length_validation_scores(validations, scores)

      @run_output.successes     = scores[:successes]
      @run_output.fails         = scores[:fails]
      total_query               = scores[:successes].to_i + scores[:fails]
      if total_query == 0
        @run_output.overall_score = 0
      else
        @run_output.overall_score = (scores[:successes] * 90 / total_query).round
      end
    end

    # Since there are two length validations, it is necessary to adjust the
    #   scores accordingly
    def length_validation_scores(validations, scores)
      lcv = validations.select { |v| v.class == LengthClusterValidationOutput }
      lrv = validations.select { |v| v.class == LengthRankValidationOutput }
      if lcv.length == 1 && lrv.length == 1
        score_lcv = (lcv[0].result == lcv[0].expected)
        score_lrv = (lrv[0].result == lrv[0].expected)
        if score_lcv == true && score_lrv == true
          scores[:successes] -= 1 # if both are true: counted as 1 success
        elsif score_lcv == false && score_lrv == false
          scores[:fails] -= 1 # if both are false: counted as 1 fail
        else
          scores[:successes] -= 0.5
          scores[:fails] -= 0.5
        end
      end
      scores
    end

    def generate_run_output
      @run_output.generate_html
      @run_output.generate_json
      @run_output.print_output_console
      generate_run_overview
    end

    def generate_run_overview
      vals        = @run_output.validations
      no_mafft    = 0
      no_internet = 0
      errors      = []
      vals.each do |v|
        unless v.errors.nil?
          no_mafft += v.errors.count { |e| e == NoMafftInstallationError }
          no_internet += v.errors.count { |e| e == NoInternetError }
        end
        errors.push(v.short_header) if v.validation == :error
      end

      no_evidence = vals.count { |v| v.result == :unapplicable || v.result == :warning } == vals.length
      nee = (no_evidence) ? 1 : 0

      good_scores = (@run_output.overall_score >= 75) ? 1 : 0
      bad_scores  = (@run_output.overall_score >= 75) ? 0 : 1

      @mutex_array.synchronize do
        @overview[:no_queries] += 1
        @overview[:scores].push(@run_output.overall_score)
        @overview[:good_scores] += good_scores
        @overview[:bad_scores] += bad_scores
        @overview[:nee] += nee
        @overview[:no_mafft] += no_mafft
        @overview[:no_internet] += no_internet
        errors.each { |err| @overview[:map_errors][err] += 1 }

        vals.each do |v|
          next if v.run_time == 0 || v.run_time.nil?
          next if v.validation == :unapplicable || v.validation == :error
          p = Pair1.new(@overview[:run_time][v.short_header].x + v.run_time,
                        @overview[:run_time][v.short_header].y + 1)
          @overview[:run_time][v.short_header] = p
        end
      end
    end
  end
end