Sha256: 5d80ca28b1f3aee9f129f8681dc5637df273ddf67baa66c21627b4660c2c7adc

Contents?: true

Size: 1.56 KB

Versions: 14

Compression:

Stored size: 1.56 KB

Contents

# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Tokenizers
    class NumberTokenizer

      SPECIAL_SYMBOLS_MAP = {
        '.' => '{DOT}',
        ',' => '{COMMA}',
        '0' => '{ZERO}',
        '#' => '{POUND}',
        'ยค' => '{CURRENCY}',
        '%' => '{PERCENT}',
        'E' => '{SCIENTIFIC}'
      }

      SPECIAL_SYMBOLS_REGEX = /'(?:#{SPECIAL_SYMBOLS_MAP.keys.map { |s| Regexp.escape(s) }.join('|')})'/

      INVERSE_SPECIAL_SYMBOLS_MAP = SPECIAL_SYMBOLS_MAP.invert

      INVERSE_SPECIAL_SYMBOLS_REGEX = /#{INVERSE_SPECIAL_SYMBOLS_MAP.keys.map { |s| Regexp.escape(s) }.join('|')}/

      attr_reader :data_reader

      def initialize(data_reader)
        @data_reader = data_reader
      end

      def tokenize(pattern)
        escaped_pattern = pattern.gsub(SPECIAL_SYMBOLS_REGEX) do |match|
          SPECIAL_SYMBOLS_MAP[match[1..-2]]
        end

        tokens = PatternTokenizer.new(data_reader, tokenizer).tokenize(escaped_pattern)

        tokens.each do |token|
          token.value = token.value.gsub(INVERSE_SPECIAL_SYMBOLS_REGEX) do |match|
            INVERSE_SPECIAL_SYMBOLS_MAP[match]
          end
        end

        if tokens.first.value == ""
          tokens[1..-1]
        else
          tokens
        end
      end

      private

      def tokenizer
        @tokenizer ||= Tokenizer.new([
          TokenRecognizer.new(:pattern, /[0?#,\.]+/),
          TokenRecognizer.new(:plaintext, //),
        ], /([^0*#,\.]*)([0#,\.]+)([^0*#,\.]*)$/, false)
      end

    end
  end
end

Version data entries

14 entries across 14 versions & 2 rubygems

Version Path
solidus_backend-1.0.0.pre3 vendor/bundle/gems/twitter_cldr-3.2.1/lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
solidus_backend-1.0.0.pre2 vendor/bundle/gems/twitter_cldr-3.2.1/lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
solidus_backend-1.0.0.pre vendor/bundle/gems/twitter_cldr-3.1.2/lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.2.1 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.2.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.1.2 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.1.1 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.1.0 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.0.10 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.0.9 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.0.8 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.0.7 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.0.6 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
twitter_cldr-3.0.5 lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb