Sha256: e5e2b570fd5f39bb7554442d9f4b2b9b5f1461dff7b3cfb7a53851daa42f5aa2

Contents?: true

Size: 1.89 KB

Versions: 26

Compression:

Stored size: 1.89 KB

Contents

# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Segmentation
    class PossibleWord
      # list size, limited by the maximum number of words in the dictionary
      # that form a nested sequence.
      POSSIBLE_WORD_LIST_MAX = 20

      def initialize
        @lengths = []
        @count = nil
        @offset = -1
      end

      # fill the list of candidates if needed, select the longest, and return the number found
      def candidates(cursor, dictionary, end_pos)
        start = cursor.position

        if start != @offset
          @offset = start
          @count, _, @lengths, @prefix = dictionary.matches(
            cursor, end_pos - start, POSSIBLE_WORD_LIST_MAX
          )

          # dictionary leaves text after longest prefix, not longest word, so back up.
          if @count <= 0
            cursor.position = start
          end
        end

        if @count > 0
          cursor.position = start + @lengths[@count - 1]
        end

        @current = @count - 1
        @mark = @current

        return @count
      end

      # select the currently marked candidate, point after it in the text, and invalidate self
      def accept_marked(cursor)
        cursor.position = @offset + @lengths[@mark]
        @lengths[@mark]
      end

      # back up from the current candidate to the next shorter one; return true if that exists
      # and point the text after it
      def back_up(cursor)
        if @current > 0
          @current -= 1
          cursor.position = @offset + @lengths[@current]
          return true
        end

        false
      end

      # return the longest prefix this candidate location shares with a dictionary word
      def longest_prefix
        @prefix
      end

      # mark the current candidate as the one we like
      def mark_current
        @mark = @current
      end
    end
  end
end

Version data entries

26 entries across 26 versions & 1 rubygems

Version Path
twitter_cldr-6.13.0 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.12.1 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.12.0 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.11.5 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.11.4 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.11.3 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.11.2 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.11.1 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.11.0 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.10.0 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.9.0 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.8.0 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.7.0 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.6.2 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.6.1 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.6.0 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.5.0 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.4.0 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.3.0 lib/twitter_cldr/segmentation/possible_word.rb
twitter_cldr-6.2.0 lib/twitter_cldr/segmentation/possible_word.rb