Sha256: ae3976259935104e204f8ccab1e315056c428f6d87a212b18f98a9a1929ef02b

Contents?: true

Size: 1.52 KB

Versions: 25

Compression:

Stored size: 1.52 KB

Contents

# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Segmentation
    class Dictionary

      class << self
        def burmese
          get('burmese')
        end

        def cj
          get('cj')
        end

        def khmer
          get('khmer')
        end

        def lao
          get('lao')
        end

        def thai
          get('thai')
        end

        def get(name)
          dictionary_cache[name] ||= begin
            resource = TwitterCldr.get_resource(
              'shared', 'segments', 'dictionaries', "#{name}dict.dump"
            )

            new(resource)
          end
        end

        private

        def dictionary_cache
          @dictionary_cache ||= {}
        end
      end

      attr_reader :trie

      def initialize(trie)
        @trie = trie
      end

      def matches(cursor, max_search_length, limit)
        return 0 if cursor.length == 0

        count = 0
        num_chars = 1
        current = trie.root.child(cursor.codepoint)
        values = []
        lengths = []

        until current.nil?
          if current.has_value? && count < limit
            values << current.value
            lengths << num_chars
            count += 1
          end

          break if num_chars >= max_search_length

          current = current.child(
            cursor.codepoint(cursor.position + num_chars)
          )

          num_chars += 1
        end

        [count, values, lengths, num_chars]
      end

    end
  end
end

Version data entries

25 entries across 25 versions & 1 rubygems

Version Path
twitter_cldr-6.12.1 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.12.0 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.11.5 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.11.4 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.11.3 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.11.2 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.11.1 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.11.0 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.10.0 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.9.0 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.8.0 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.7.0 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.6.2 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.6.1 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.6.0 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.5.0 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.4.0 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.3.0 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.2.0 lib/twitter_cldr/segmentation/dictionary.rb
twitter_cldr-6.1.0 lib/twitter_cldr/segmentation/dictionary.rb