Sha256: f10db08d67415b4cbf207d4a19eccaba66c08f5d61f738f2d5e4d9184cf54486
Contents?: true
Size: 1.83 KB
Versions: 3
Compression:
Stored size: 1.83 KB
Contents
# encoding: UTF-8 # Copyright 2012 Twitter, Inc # http://www.apache.org/licenses/LICENSE-2.0 module TwitterCldr module Shared class Casefolder class << self CASEFOLDING_REGEX_C = /<%= casefolding_char_class_for("C") %>/ CASEFOLDING_REGEX_F = /<%= casefolding_char_class_for("F") %>/ CASEFOLDING_REGEX_S = /<%= casefolding_char_class_for("S") %>/ CASEFOLDING_REGEX_T = /<%= casefolding_char_class_for("T") %>/ CASEFOLDING_HASH = <%= inspect_hash_in_lines(casefolding_hash_for(["C", "F", "S"]), 8, 5) %> CASEFOLDING_HASH_T = CASEFOLDING_HASH.merge( <%= inspect_hash_in_lines(casefolding_hash_for(["T"]), 9, 6) %> ) def simple_casefold(str, t = false) perform_casefold(str, simple_casefold_regex, t) end def full_casefold(str, t = false) perform_casefold(str, full_casefold_regex, t) end alias :casefold :full_casefold def common_casefold(str) perform_casefold(str, CASEFOLDING_REGEX_C, false) end private def perform_casefold(str, regex, t) regex = regex_with_t(regex) if t casefolding_hash = t ? CASEFOLDING_HASH_T : CASEFOLDING_HASH str.gsub(regex) do |s| s.unpack("U*").inject([]) do |ret, ss| ret + casefolding_hash[ss] end.pack("U*") end end def simple_casefold_regex @simple_casefold_regex ||= Regexp.union(CASEFOLDING_REGEX_C, CASEFOLDING_REGEX_S) end def full_casefold_regex @full_casefold_regex ||= Regexp.union(CASEFOLDING_REGEX_C, CASEFOLDING_REGEX_F) end def regex_with_t(regex) @regex_with_t_cache[regex.source] ||= Regexp.union(regex, CASEFOLDING_REGEX_T) end end end end end
Version data entries
3 entries across 3 versions & 1 rubygems