lib/twitter_cldr/normalization/hangul.rb in twitter_cldr-2.4.3 vs lib/twitter_cldr/normalization/hangul.rb in twitter_cldr-3.0.0.beta1
- old
+ new
@@ -7,10 +7,27 @@
module Normalization
module Hangul
class << self
+ SBASE = 0xAC00
+ LBASE = 0x1100
+ VBASE = 0x1161
+ TBASE = 0x11A7
+
+ LCOUNT = 19
+ VCOUNT = 21
+ TCOUNT = 28
+
+ NCOUNT = VCOUNT * TCOUNT # 588
+ SCOUNT = LCOUNT * NCOUNT # 11172
+
+ LLIMIT = LBASE + LCOUNT # 0x1113 = 4371
+ VLIMIT = VBASE + VCOUNT # 0x1176 = 4470
+ TLIMIT = TBASE + TCOUNT # 0x11C3 = 4547
+ SLIMIT = SBASE + SCOUNT # 0xD7A4 = 55204
+
# Special composition for Hangul syllables. Documented in Section 3.12 at
# http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
#
def compose(code_points)
l = code_points.first - LBASE
@@ -22,47 +39,41 @@
# Special decomposition for Hangul syllables. Documented in Section 3.12 at http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
# Also see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm#Hangul_Implicit_CEs
#
def decompose(code_point)
- l = code_point - SBASE
+ decomposition_cache[code_point] ||= begin
+ l = code_point - SBASE
- t = l % TCOUNT
- l /= TCOUNT
- v = l % VCOUNT
- l /= VCOUNT
+ t = l % TCOUNT
+ l /= TCOUNT
+ v = l % VCOUNT
+ l /= VCOUNT
- result = []
+ result = []
- result << LBASE + l
- result << VBASE + v
- result << TBASE + t if t > 0
+ result << LBASE + l
+ result << VBASE + v
+ result << TBASE + t if t > 0
- result
+ result
+ end
end
def hangul_syllable?(code_point)
(SBASE...SLIMIT).include?(code_point)
end
- SBASE = 0xAC00
- LBASE = 0x1100
- VBASE = 0x1161
- TBASE = 0x11A7
+ private
- LCOUNT = 19
- VCOUNT = 21
- TCOUNT = 28
+ def syllable_cache
+ @syllable_cache ||= {}
+ end
- NCOUNT = VCOUNT * TCOUNT # 588
- SCOUNT = LCOUNT * NCOUNT # 11172
+ def decomposition_cache
+ @decomposition_cache ||= {}
+ end
- LLIMIT = LBASE + LCOUNT # 0x1113 = 4371
- VLIMIT = VBASE + VCOUNT # 0x1176 = 4470
- TLIMIT = TBASE + TCOUNT # 0x11C3 = 4547
- SLIMIT = SBASE + SCOUNT # 0xD7A4 = 55204
-
end
-
end
end
end