Sha256: 5e241e248fe9f57fcce5581129812ec76443d5c397d05be2c64f7bc76b827049

Contents?: true

Size: 1.5 KB

Versions: 11

Compression:

Stored size: 1.5 KB

Contents

# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Tokenizers
    class RbnfTokenizer

      def tokenize(pattern)
        PatternTokenizer.new(nil, tokenizer).tokenize(pattern)
      end

      private

      def tokenizer
        @tokenizer ||= begin
          recognizers = [
            # special rule descriptors
            TokenRecognizer.new(:negative, /-x/),
            TokenRecognizer.new(:improper_fraction, /x\.x/),
            TokenRecognizer.new(:proper_fraction, /0\.x/),
            TokenRecognizer.new(:master, /x\.0/),

            # normal rule descriptors
            TokenRecognizer.new(:equals, /=/),
            TokenRecognizer.new(:rule, /%%?[[:word:]-]+/),  # i.e. %spellout-numbering, %%2d-year
            TokenRecognizer.new(:right_arrow, />/),
            TokenRecognizer.new(:left_arrow, /</),
            TokenRecognizer.new(:open_bracket, /\[/),
            TokenRecognizer.new(:close_bracket, /\]/),
            TokenRecognizer.new(:decimal, /[0#][0#,\.]+/),
            TokenRecognizer.new(:plural, /\$\(.*\)\$/),

            # ending
            TokenRecognizer.new(:semicolon, /;/),
          ]

          splitter_source = recognizers.map { |r| r.regex.source }.join("|")
          splitter = Regexp.new("(#{splitter_source})")

          Tokenizer.new(
            recognizers + [
              TokenRecognizer.new(:plaintext, //)  # catch-all
            ], splitter
          )
        end
      end

    end
  end
end

Version data entries

11 entries across 11 versions & 1 rubygems

Version Path
twitter_cldr-4.4.4 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-4.4.3 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-4.4.2 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-4.4.1 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-4.4.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-4.3.1 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-4.3.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-4.2.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-4.1.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-4.0.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-3.6.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb