lib/twitter_cldr/normalization/hangul.rb in twitter_cldr-2.4.3 vs lib/twitter_cldr/normalization/hangul.rb in twitter_cldr-3.0.0.beta1

- old
+ new

@@ -7,10 +7,27 @@ module Normalization module Hangul class << self + SBASE = 0xAC00 + LBASE = 0x1100 + VBASE = 0x1161 + TBASE = 0x11A7 + + LCOUNT = 19 + VCOUNT = 21 + TCOUNT = 28 + + NCOUNT = VCOUNT * TCOUNT # 588 + SCOUNT = LCOUNT * NCOUNT # 11172 + + LLIMIT = LBASE + LCOUNT # 0x1113 = 4371 + VLIMIT = VBASE + VCOUNT # 0x1176 = 4470 + TLIMIT = TBASE + TCOUNT # 0x11C3 = 4547 + SLIMIT = SBASE + SCOUNT # 0xD7A4 = 55204 + # Special composition for Hangul syllables. Documented in Section 3.12 at # http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf # def compose(code_points) l = code_points.first - LBASE @@ -22,47 +39,41 @@ # Special decomposition for Hangul syllables. Documented in Section 3.12 at http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf # Also see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm#Hangul_Implicit_CEs # def decompose(code_point) - l = code_point - SBASE + decomposition_cache[code_point] ||= begin + l = code_point - SBASE - t = l % TCOUNT - l /= TCOUNT - v = l % VCOUNT - l /= VCOUNT + t = l % TCOUNT + l /= TCOUNT + v = l % VCOUNT + l /= VCOUNT - result = [] + result = [] - result << LBASE + l - result << VBASE + v - result << TBASE + t if t > 0 + result << LBASE + l + result << VBASE + v + result << TBASE + t if t > 0 - result + result + end end def hangul_syllable?(code_point) (SBASE...SLIMIT).include?(code_point) end - SBASE = 0xAC00 - LBASE = 0x1100 - VBASE = 0x1161 - TBASE = 0x11A7 + private - LCOUNT = 19 - VCOUNT = 21 - TCOUNT = 28 + def syllable_cache + @syllable_cache ||= {} + end - NCOUNT = VCOUNT * TCOUNT # 588 - SCOUNT = LCOUNT * NCOUNT # 11172 + def decomposition_cache + @decomposition_cache ||= {} + end - LLIMIT = LBASE + LCOUNT # 0x1113 = 4371 - VLIMIT = VBASE + VCOUNT # 0x1176 = 4470 - TLIMIT = TBASE + TCOUNT # 0x11C3 = 4547 - SLIMIT = SBASE + SCOUNT # 0xD7A4 = 55204 - end - end end end