Sha256: e5e2b570fd5f39bb7554442d9f4b2b9b5f1461dff7b3cfb7a53851daa42f5aa2
Contents?: true
Size: 1.89 KB
Versions: 26
Compression:
Stored size: 1.89 KB
Contents
# encoding: UTF-8 # Copyright 2012 Twitter, Inc # http://www.apache.org/licenses/LICENSE-2.0 module TwitterCldr module Segmentation class PossibleWord # list size, limited by the maximum number of words in the dictionary # that form a nested sequence. POSSIBLE_WORD_LIST_MAX = 20 def initialize @lengths = [] @count = nil @offset = -1 end # fill the list of candidates if needed, select the longest, and return the number found def candidates(cursor, dictionary, end_pos) start = cursor.position if start != @offset @offset = start @count, _, @lengths, @prefix = dictionary.matches( cursor, end_pos - start, POSSIBLE_WORD_LIST_MAX ) # dictionary leaves text after longest prefix, not longest word, so back up. if @count <= 0 cursor.position = start end end if @count > 0 cursor.position = start + @lengths[@count - 1] end @current = @count - 1 @mark = @current return @count end # select the currently marked candidate, point after it in the text, and invalidate self def accept_marked(cursor) cursor.position = @offset + @lengths[@mark] @lengths[@mark] end # back up from the current candidate to the next shorter one; return true if that exists # and point the text after it def back_up(cursor) if @current > 0 @current -= 1 cursor.position = @offset + @lengths[@current] return true end false end # return the longest prefix this candidate location shares with a dictionary word def longest_prefix @prefix end # mark the current candidate as the one we like def mark_current @mark = @current end end end end
Version data entries
26 entries across 26 versions & 1 rubygems