Sha256: 2df9a76ef9f75b7450618f1bd1e523b2a3a7fd564afd4fedf67d63d185855223
Contents?: true
Size: 1.91 KB
Versions: 27
Compression:
Stored size: 1.91 KB
Contents
# encoding: UTF-8 # Copyright 2012 Twitter, Inc # http://www.apache.org/licenses/LICENSE-2.0 require 'singleton' module TwitterCldr module Segmentation class Suppressions include Singleton class << self def instance(boundary_type, locale) resource_path = find_resource(boundary_type, locale) return NullSuppressions.instance unless resource_path cache[resource_path] ||= begin rsrc = TwitterCldr.get_resource(resource_path) new( Marshal.load(rsrc[:forwards_trie]), Marshal.load(rsrc[:backwards_trie]) ) end end private def find_resource(boundary_type, locale) path = TwitterCldr.resource_file_path( ['shared', 'segments', 'suppressions', locale, boundary_type] ) path if TwitterCldr.resource_exists?(path) end def cache @cache ||= {} end end attr_reader :forward_trie, :backward_trie def initialize(forward_trie, backward_trie) @forward_trie = forward_trie @backward_trie = backward_trie end def should_break?(cursor) idx = cursor.position # consider case when a space follows the '.' (so we handle i.e. "Mr. Brown") idx -= 2 if cursor.codepoint(idx - 1) == 32 node = backward_trie.root found = loop do break false if idx < 0 || idx >= cursor.length node = node.child(cursor.codepoint(idx)) break false unless node break true if node.value idx -= 1 end return true unless found node = forward_trie.root loop do return true if idx >= cursor.length node = node.child(cursor.codepoint(idx)) return true unless node return false if node.value idx += 1 end end end end end
Version data entries
27 entries across 27 versions & 1 rubygems