Sha256: d7fb7760544dc36e99f6b114c73a68ee01b4e41abcae5bab2771260661195186

Contents?: true

Size: 1.5 KB

Versions: 32

Compression:

Stored size: 1.5 KB

Contents

# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Tokenizers
    class RbnfTokenizer

      def tokenize(pattern)
        PatternTokenizer.new(nil, tokenizer).tokenize(pattern)
      end

      private

      def tokenizer
        @tokenizer ||= begin
          recognizers = [
            # special rule descriptors
            TokenRecognizer.new(:negative, /-x/),
            TokenRecognizer.new(:improper_fraction, /x\.x/),
            TokenRecognizer.new(:proper_fraction, /0\.x/),
            TokenRecognizer.new(:master, /x\.0/),

            # normal rule descriptors
            TokenRecognizer.new(:equals, /=/),
            TokenRecognizer.new(:rule, /%%?[[:word:]-]+/),  # i.e. %spellout-numbering, %%2d-year
            TokenRecognizer.new(:right_arrow, />/),
            TokenRecognizer.new(:left_arrow, /</),
            TokenRecognizer.new(:open_bracket, /\[/),
            TokenRecognizer.new(:close_bracket, /\]/),
            TokenRecognizer.new(:decimal, /[0#][0#,\.]*/),
            TokenRecognizer.new(:plural, /\$\(.*\)\$/),

            # ending
            TokenRecognizer.new(:semicolon, /;/),
          ]

          splitter_source = recognizers.map { |r| r.regex.source }.join("|")
          splitter = Regexp.new("(#{splitter_source})")

          Tokenizer.new(
            recognizers + [
              TokenRecognizer.new(:plaintext, //)  # catch-all
            ], splitter
          )
        end
      end

    end
  end
end

Version data entries

32 entries across 32 versions & 2 rubygems

Version Path
twitter_cldr-6.1.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
metanorma-cli-1.3.4 gems/ruby/2.6.0/gems/twitter_cldr-4.4.5/lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-6.0.2 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
metanorma-cli-1.3.3.1 gems/ruby/2.6.0/gems/twitter_cldr-4.4.5/lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-6.0.1 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-6.0.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-5.4.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-5.3.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-5.2.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-5.1.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-5.0.0 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb
twitter_cldr-4.4.5 lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb