Sha256: 0fe96eb982a3aedb9aec8deb0dc2e120768cca3c8b8a8d1827a93a6d5eb35fac

Contents?: true

Size: 1.25 KB

Versions: 4

Compression:

Stored size: 1.25 KB

Contents

# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Segmentation
    class BreakIterator

      attr_reader :locale, :options

      def initialize(locale = TwitterCldr.locale, options = {})
        @locale = locale
        @options = options
      end

      def each_sentence(str, &block)
        rule_set = rule_set_for('sentence')
        each_boundary(rule_set, str, &block)
      end

      def each_word(str, &block)
        rule_set = rule_set_for('word')
        each_boundary(rule_set, str, &block)
      end

      def each_grapheme_cluster(str, &block)
        raise NotImplementedError,
          "Grapheme segmentation is not currently supported."
      end

      def each_line(str, &block)
        raise NotImplementedError,
          "Line segmentation is not currently supported."
      end

      private

      def each_boundary(rule_set, str)
        if block_given?
          rule_set.each_boundary(str).each_cons(2) do |start, stop|
            yield str[start...stop]
          end
        else
          to_enum(__method__, rule_set, str)
        end
      end

      def rule_set_for(boundary_type)
        RuleSet.load(locale, boundary_type, options)
      end
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
twitter_cldr-3.6.0 lib/twitter_cldr/segmentation/break_iterator.rb
twitter_cldr-3.5.0 lib/twitter_cldr/segmentation/break_iterator.rb
twitter_cldr-3.4.0 lib/twitter_cldr/segmentation/break_iterator.rb
twitter_cldr-3.3.0 lib/twitter_cldr/segmentation/break_iterator.rb