Sha256: 15c3b8295a0df9a7917783609e1052c971dea682905454228d8c85ea0f2c8dfd

Contents?: true

Size: 1.56 KB

Versions: 47

Compression:

Stored size: 1.56 KB

Contents

# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Tokenizers
    class NumberTokenizer

      SPECIAL_SYMBOLS_MAP = {
        '.' => '{DOT}',
        ',' => '{COMMA}',
        '0' => '{ZERO}',
        '#' => '{POUND}',
        'ยค' => '{CURRENCY}',
        '%' => '{PERCENT}',
        'E' => '{SCIENTIFIC}'
      }

      SPECIAL_SYMBOLS_REGEX = /'(?:#{SPECIAL_SYMBOLS_MAP.keys.map { |s| Regexp.escape(s) }.join('|')})'/

      INVERSE_SPECIAL_SYMBOLS_MAP = SPECIAL_SYMBOLS_MAP.invert

      INVERSE_SPECIAL_SYMBOLS_REGEX = /#{INVERSE_SPECIAL_SYMBOLS_MAP.keys.map { |s| Regexp.escape(s) }.join('|')}/

      attr_reader :data_reader

      def initialize(data_reader)
        @data_reader = data_reader
      end

      def tokenize(pattern)
        escaped_pattern = pattern.gsub(SPECIAL_SYMBOLS_REGEX) do |match|
          SPECIAL_SYMBOLS_MAP[match[1..-2]]
        end

        tokens = PatternTokenizer.new(data_reader, tokenizer).tokenize(escaped_pattern)

        tokens.each do |token|
          token.value = token.value.gsub(INVERSE_SPECIAL_SYMBOLS_REGEX) do |match|
            INVERSE_SPECIAL_SYMBOLS_MAP[match]
          end
        end

        if tokens.first.value == ""
          tokens[1..-1]
        else
          tokens
        end
      end

      private

      def tokenizer
        @tokenizer ||= Tokenizer.new([
          TokenRecognizer.new(:pattern, /[0?#,\.]+/),
          TokenRecognizer.new(:plaintext, //),
        ], /([^0*#,\.]*)([0#,\.]+)([^0*#,\.]*)$/, false)
      end

    end
  end
end

Version data entries

47 entries across 47 versions & 2 rubygems

Version Path
twitter_cldr-4.2.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-4.1.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-4.0.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.6.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.5.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.4.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.3.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb