# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Segmentation
    class RuleSet

      class << self
        def load(*args)
          RuleSetBuilder.load(*args)
        end
      end

      attr_reader :locale, :rules, :boundary_type
      attr_accessor :use_uli_exceptions

      alias_method :use_uli_exceptions?, :use_uli_exceptions

      def initialize(locale, rules, boundary_type, options)
        @locale = locale
        @rules = rules
        @boundary_type = boundary_type
        @use_uli_exceptions = options.fetch(
          :use_uli_exceptions, false
        )
      end

      def each_boundary(str)
        if block_given?
          cursor = Cursor.new(str)
          last_boundary = 0

          # implicit start of text boundary
          yield 0

          until cursor.eof?
            match = find_match(cursor)
            rule = match.rule

            if rule.break?
              yield match.boundary_position
              last_boundary = match.boundary_position
            end

            if match.boundary_position == cursor.position
              cursor.advance
            else
              cursor.advance(
                match.boundary_position - cursor.position
              )
            end
          end

          # implicit end of text boundary
          yield str.size unless last_boundary == str.size
        else
          to_enum(__method__, str)
        end
      end

      private

      def each_rule(&block)
        if block_given?
          if use_uli_exceptions? && supports_exceptions?
            yield exception_rule
          end

          rules.each(&block)
        else
          to_enum(__method__)
        end
      end

      def exception_rule
        @exception_rule ||= RuleSetBuilder.exception_rule_for(
          locale, boundary_type
        )
      end

      def supports_exceptions?
        boundary_type == 'sentence'
      end

      def find_match(cursor)
        match = find_cached_match(cursor)

        match || if cursor.eos?
          RuleSetBuilder.implicit_end_of_text_rule.match(cursor)
        else
          RuleSetBuilder.implicit_final_rule.match(cursor)
        end
      end

      def find_cached_match(cursor)
        cursor.match_cache.fetch(cursor.position) do
          matches = match_all(cursor)

          matches.each do |m|
            cursor.match_cache[m.boundary_position - 1] ||= m
          end

          matches.first
        end
      end

      def match_all(cursor)
        each_rule.each_with_object([]) do |rule, ret|
          if match = rule.match(cursor)
            ret << match
          end
        end
      end
    end
  end
end