module Stamina
  module Induction

    #
    # Implementation of the BlueFringe variant of the RPNI algorithm (with the blue-fringe
    # heuristics).
    #
    # See Lang, K., B. Pearlmutter, andR. Price. 1998. Results of the Abbadingo One DFA
    # Learning Competition and a New Evidence-Driven State Merging Algorithm, In Grammatical
    # Inference, pp. 1–12. Ames, IO: Springer-Verlag.
    #
    # Example:
    #   # sample typically comes from an ADL file
    #   sample = Stamina::ADL.parse_sample_file('sample.adl')
    #
    #   # let BlueFringe build the smallest dfa
    #   dfa = Stamina::Induction::BlueFringe.execute(sample, {:verbose => true})
    #
    # Remarks:
    # - Constructor and instance methods of this class are public but not intended
    #   to be used directly. They are left public for testing purposes only.
    # - Having read the Stamina::Induction::BlueFringe base algorithm may help undertanding
    #   this variant.
    # - This class intensively uses the Stamina::Induction::UnionFind class and
    #   methods defined in the Stamina::Induction::Commons module which are worth
    #   reading to understand the algorithm implementation.
    #
    class BlueFringe
      include Stamina::Induction::Commons

      # Union-find data structure used internally
      attr_reader :ufds

      # Creates an algorithm instance with given options.
      def initialize(options={})
        raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
        @options = DEFAULT_OPTIONS.merge(options)
        @score_cache = {}
      end

      #
      # Computes the score of a single (group) merge. Returned value is 1 if both are
      # accepting states or both are error states and 0 otherwise. Note that d1 and d2
      # are expected to be merge compatible as this method does not distinguish this
      # case.
      #
      def merge_score(d1, d2)
        # Score of 1 if both accepting or both error
        ((d1[:accepting] and d2[:accepting]) or (d1[:error] and d2[:error])) ? 1 : 0
      end

      #
      # Merges a state of rank j with a state of lower rank i. This merge method
      # includes merging for determinization. It returns nil if the merge is
      # incompatible, a merge score otherwise.
      #
      # Preconditions:
      # - States denoted by i and j are expected leader states (non merged ones)
      # - States denoted by i and j are expected to be different
      #
      # Postconditions:
      # - Union find is refined, states i and j having been merged, as well as all
      #   state pairs that need to be merged to ensure the deterministic property
      #   of the quotient automaton.
      # - If the resulting quotient automaton is consistent with the negative sample,
      #   this method returns the number of accepting pairs + the number of error pairs
      #   that have been merged. The refined union-find correctly encodes the quotient
      #   automaton. Otherwise, the method returns nil and the union-find information
      #   must be considered inaccurate.
      #
      def merge_and_determinize(i, j)
        # Make the union (keep merging score as well as additional merges to be performed
        # in score and determinization, respectively). Recompute the user data attached to
        # the new state group (new_data)
        determinization, score = [], nil
        @ufds.union(i, j) do |d1, d2|
          # states are incompatible if new_data cannot be created because it would
          # lead to merge and error and an accepting state. We simply return nil in this
          # case...
          return nil unless (new_data = merge_user_data(d1, d2, determinization))
          # otherwise, we score
          score = merge_score(d1, d2)
          # and we let the union find keep the new_data for the group
          new_data
        end

        # Merge for determinization starts here, based on the determinization array
        # computed as a side effect of merge_user_data
        determinization.each do |pair|
          # we take the leader states of the pair to merge
          pair = pair.collect{|i| @ufds.find(i)}
          # do nothing if already the same leader state
          next if pair[0]==pair[1]
          # otherwise recurse and keep subscore
          subscore = merge_and_determinize(pair[0], pair[1])
          # failure if merging for determinization led to merge error and accepting
          # states
          return nil if subscore.nil?
          # this is the new score
          score += subscore
        end

        score
      end

      #
      # Evaluates the score of merging states i and j. Returns nil if the states are
      # cannot be merged, a positive score otherwise.
      #
      # Preconditions:
      # - States denoted by i and j are expected leader states (non merged ones)
      # - States denoted by i and j are expected to be different
      #
      # Postconditions:
      # - Returned value is nil if the quotient automaton would be incompatible with
      #   the sample. Otherwise a positive number is returned, encoding the number of
      #   interresting pairs that have been merged (interesting = both accepting or both
      #   error)
      # - The union find is ALWAYS restored to its previous value after merging has
      #   been evaluated and is then seen unchanged by the caller.
      #
      def merge_and_determinize_score(i, j)
        score = @score_cache[[i,j]] ||= begin
          # score the merging, always rollback the transaction
          score = nil
          @ufds.transactional do
            score = merge_and_determinize(i, j)
            false
          end
          score || -1
        end
        score == -1 ? nil : score
      end

      #
      # Computes the fringe given the current union find. The fringe is returned as an
      # array of state indices.
      #
      # Postconditions:
      # - Returned array contains indices of leader states only.
      # - Returned array is disjoint with the kernel.
      #
      def fringe
        fringe = []
        @kernel.each do |k1|
          delta = @ufds.mergeable_data(k1)[:delta]
          delta.each_pair{|symbol, target| fringe << @ufds.find(target)}
        end
        (fringe - @kernel).sort
      end

      #
      # Main method of the algorithm. Refines the union find passed as first argument
      # by merging well chosen state pairs. Returns the refined union find.
      #
      # Preconditions:
      # - The union find _ufds_ is correctly initialized (contains :initial, :accepting,
      #   and :error boolean flags as well as a :delta sub hash)
      #
      # Postconditions:
      # - The union find has been refined. It encodes a quotient automaton (of the PTA
      #   it comes from) such that all positive and negative strings of the underlying
      #   sample are correctly classified by it.
      #
      def main(ufds)
        info("Starting BlueFringe (#{ufds.size} states)")
        @ufds, @kernel, @score_cache = ufds, [0], {}

        # we do it until the fringe is empty (compute it only once each step)
        until (the_fringe=fringe).empty?
          # state to consolidate (if any)
          to_consolidate = nil
          # best candidate [source index, target index, score]
          best = [nil, nil, -1]

          # for each state on the fringe as merge candidate
          the_fringe.each do |candidate|
            to_consolidate = candidate

            # evaluate score of merging candidate with each kernel state
            @kernel.each do |target|
              score = merge_and_determinize_score(candidate, target)
              unless score.nil?
                # if a score has been found, the candidate will not be
                # consolidated. We keep it as best if its better than the
                # previous one
                to_consolidate = nil
                best = [candidate, target, score] if score > best[2]
              end
            end

            # No possible target, break the loop (will consolidate right now)!
            break unless to_consolidate.nil?
          end

          # If not found, the last candidate must be consolidated. Otherwise, we
          # do the best merging
          unless to_consolidate.nil?
            info("Consolidation of #{to_consolidate}")
            @kernel << to_consolidate
          else
            @score_cache.clear
            info("Merging #{best[0]} and #{best[1]} [#{best[2]}]")
            # this one should never fail because its score was positive before
            raise "Unexpected case" unless merge_and_determinize(best[0], best[1])
          end

          # blue_fringe does not guarantee that it will not merge a state of lower rank
          # with a kernel state. The kernel should then be update at each step to keep
          # lowest indices for the whole kernel, and we sort it
          @kernel = @kernel.collect{|k| @ufds.find(k)}.sort
        end

        # return the refined union find now
        @ufds
      end

      #
      # Build the smallest DFA compatible with the sample given as input.
      #
      # Preconditions:
      # - The sample is consistent (does not contains the same string both labeled as
      #   positive and negative) and contains at least one string.
      #
      # Postconditions:
      # - The returned DFA is the smallest DFA that correctly labels the learning sample
      #   given as input.
      #
      # Remarks:
      # - This instance version of BlueFringe.execute is not intended to be used directly and
      #   is mainly provided for testing purposes. Please use the class variant of this
      #   method if possible.
      #
      def execute(sample)
        # create union-find
        info("Creating PTA and UnionFind structure")
        ufds = sample2ufds(sample)
        # refine it
        ufds = main(ufds)
        # compute and return quotient automaton
        ufds2dfa(ufds)
      end

      #
      # Build the smallest DFA compatible with the sample given as input.
      #
      # Options (the _options_ hash):
      # - :verbose can be set to true to trace algorithm execution on standard output.
      #
      # Preconditions:
      # - The sample is consistent (does not contains the same string both labeled as
      #   positive and negative) and contains at least one string.
      #
      # Postconditions:
      # - The returned DFA is the smallest DFA that correctly labels the learning sample
      #   given as input.
      #
      def self.execute(sample, options={})
        BlueFringe.new(options).execute(sample)
      end

    end # class BlueFringe

  end # module Induction
end # module Stamina