Sha256: b1ffcaf4013ad9df24e2dcfddfac681f94d4383d261c4364a647b3f780ebaefe

Contents?: true

Size: 1.64 KB

Versions: 1

Compression:

Stored size: 1.64 KB

Contents

# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Segmentation
    class RuleSet

      class << self
        def create(locale, boundary_type, options = {})
          new(locale, StateMachine.instance(boundary_type, locale), options)
        end
      end

      attr_reader :locale, :state_machine
      attr_accessor :use_uli_exceptions

      alias_method :use_uli_exceptions?, :use_uli_exceptions

      def initialize(locale, state_machine, options)
        @locale = locale
        @state_machine = state_machine
        @use_uli_exceptions = options.fetch(
          :use_uli_exceptions, false
        )
      end

      def each_boundary(str)
        return to_enum(__method__, str) unless block_given?

        cursor = Cursor.new(str)

        # Let the state machine find the first boundary for the line
        # boundary type. This helps pass nearly all the Unicode
        # segmentation tests, so it must be the right thing to do.
        # Normally the first boundary is the implicit start of text
        # boundary, but potentially not for the line rules?
        yield 0 unless state_machine.boundary_type == 'line'

        until cursor.eos?
          state_machine.handle_next(cursor)
          yield cursor.position if suppressions.should_break?(cursor)
        end
      end

      def boundary_type
        state_machine.boundary_type
      end

      private

      def suppressions
        @suppressions ||= if use_uli_exceptions?
          Suppressions.instance(boundary_type, locale)
        else
          NullSuppressions.instance
        end
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
twitter_cldr-5.2.0 lib/twitter_cldr/segmentation/rule_set.rb