Sha256: d60ef08efc3d30913e4f681290ce3c43bf7d601fa028d53ec0de42ef9bb04f89
Contents?: true
Size: 1.72 KB
Versions: 4
Compression:
Stored size: 1.72 KB
Contents
# Copyright 2012 Twitter, Inc # http://www.apache.org/licenses/LICENSE-2.0 class TwitterCldr.NumberTokenizer constructor : (@data_reader) -> @special_symbols_map = { '.' : '{DOT}', ',' : '{COMMA}', '0' : '{ZERO}', '#' : '{POUND}', 'ยค' : '{CURRENCY}', '%' : '{PERCENT}', 'E' : '{SCIENTIFIC}' } @inverse_special_symbols_map = {} for k, v of @special_symbols_map @inverse_special_symbols_map[v] = k @special_symbols_regex = new RegExp("'(?:" + ( [(TwitterCldr.Utilities.regex_escape(k)) for k, v of @special_symbols_map] ).join('|') + ")'") @inverse_special_symbols_regex = new RegExp( ( [TwitterCldr.Utilities.regex_escape(k) for k, v of @inverse_special_symbols_map] ).join('|')) recognizers = [ new TwitterCldr.TokenRecognizer("pattern", new RegExp(/[0?#,\.]+/)), new TwitterCldr.TokenRecognizer("plaintext", new RegExp(/[\s\S]*/)), ] @tokenizer = new TwitterCldr.Tokenizer( recognizers, new RegExp(/([^0*#,\.]*)([0#,\.]+)([^0*#,\.]*)$/), false ) splitter_source = (r.regex.source for r in recognizers).join("|") splitter = new RegExp(splitter_source) tokenize : (pattern) -> escaped_pattern = pattern.replace(@special_symbols_regex, (match) -> @special_symbols_map[match].slice(1, @special_symbols_map[match].length-1) ) tokens = ( new TwitterCldr.PatternTokenizer(@data_reader, @tokenizer) ).tokenize(escaped_pattern) for token in tokens token.value = token.value.replace(@inverse_special_symbols_regex, (match) -> @inverse_special_symbols_map[match] ) if tokens[0].value is "" tokens.slice(1) else tokens
Version data entries
4 entries across 4 versions & 1 rubygems