Sha256: 15c3b8295a0df9a7917783609e1052c971dea682905454228d8c85ea0f2c8dfd

Contents?: true

Size: 1.56 KB

Versions: 45

Compression:

Stored size: 1.56 KB

Contents

# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Tokenizers
    class NumberTokenizer

      SPECIAL_SYMBOLS_MAP = {
        '.' => '{DOT}',
        ',' => '{COMMA}',
        '0' => '{ZERO}',
        '#' => '{POUND}',
        'ยค' => '{CURRENCY}',
        '%' => '{PERCENT}',
        'E' => '{SCIENTIFIC}'
      }

      SPECIAL_SYMBOLS_REGEX = /'(?:#{SPECIAL_SYMBOLS_MAP.keys.map { |s| Regexp.escape(s) }.join('|')})'/

      INVERSE_SPECIAL_SYMBOLS_MAP = SPECIAL_SYMBOLS_MAP.invert

      INVERSE_SPECIAL_SYMBOLS_REGEX = /#{INVERSE_SPECIAL_SYMBOLS_MAP.keys.map { |s| Regexp.escape(s) }.join('|')}/

      attr_reader :data_reader

      def initialize(data_reader)
        @data_reader = data_reader
      end

      def tokenize(pattern)
        escaped_pattern = pattern.gsub(SPECIAL_SYMBOLS_REGEX) do |match|
          SPECIAL_SYMBOLS_MAP[match[1..-2]]
        end

        tokens = PatternTokenizer.new(data_reader, tokenizer).tokenize(escaped_pattern)

        tokens.each do |token|
          token.value = token.value.gsub(INVERSE_SPECIAL_SYMBOLS_REGEX) do |match|
            INVERSE_SPECIAL_SYMBOLS_MAP[match]
          end
        end

        if tokens.first.value == ""
          tokens[1..-1]
        else
          tokens
        end
      end

      private

      def tokenizer
        @tokenizer ||= Tokenizer.new([
          TokenRecognizer.new(:pattern, /[0?#,\.]+/),
          TokenRecognizer.new(:plaintext, //),
        ], /([^0*#,\.]*)([0#,\.]+)([^0*#,\.]*)$/, false)
      end

    end
  end
end

Version data entries

45 entries across 45 versions & 2 rubygems

Version Path
twitter_cldr-6.12.1 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.12.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.11.5 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.11.4 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.11.3 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.11.2 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.11.1 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.11.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.10.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.9.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.8.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.7.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.6.2 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.6.1 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.6.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.5.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.4.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.3.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.2.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-6.1.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb