Sha256: 97c67220601a90adb48e092cebdd3a270ec916be54f15360ab6e3bc736e83315

Contents?: true

Size: 1.69 KB

Versions: 19

Compression:

Stored size: 1.69 KB

Contents

# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Segmentation
    class Parser

      def parse(text, options = {})
        left_str, boundary_symbol_str, right_str = text.split(/([÷×])/)
        boundary_symbol = boundary_symbol_for(boundary_symbol_str)
        left = compile_token_list(tokenize_regex(left_str || ''), options)
        right = compile_token_list(tokenize_regex(right_str || ''), options)
        klass = class_for(boundary_symbol)
        klass.new(left, right)
      end

      def tokenize_regex(text)
        regex_tokenizer.tokenize(text).reject do |token|
          token.value.strip.empty?
        end
      end

      private

      def boundary_symbol_for(str)
        case str
          when '÷' then :break
          when '×' then :no_break
        end
      end

      def class_for(boundary_symbol)
        case boundary_symbol
          when :break
            BreakRule
          when :no_break
            NoBreakRule
        end
      end

      def compile_token_list(token_list, options)
        if token_list.empty?
          TwitterCldr::Shared::UnicodeRegex.compile('')
        else
          parse_regex(token_list, options)
        end
      end

      def parse_regex(tokens, options)
        unless tokens.empty?
          TwitterCldr::Shared::UnicodeRegex.new(
            regex_parser.parse(tokens, options), 'm'
          )
        end
      end

      def regex_tokenizer
        @tokenizer ||=
          TwitterCldr::Tokenizers::UnicodeRegexTokenizer.new
      end

      def regex_parser
        @regex_parser ||=
          TwitterCldr::Parsers::UnicodeRegexParser.new
      end

    end
  end
end

Version data entries

19 entries across 19 versions & 2 rubygems

Version Path
metanorma-cli-1.3.4 gems/ruby/2.6.0/gems/twitter_cldr-4.4.5/lib/twitter_cldr/segmentation/parser.rb
metanorma-cli-1.3.3.1 gems/ruby/2.6.0/gems/twitter_cldr-4.4.5/lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-5.1.0 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-5.0.0 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-4.4.5 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-4.4.4 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-4.4.3 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-4.4.2 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-4.4.1 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-4.4.0 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-4.3.1 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-4.3.0 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-4.2.0 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-4.1.0 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-4.0.0 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-3.6.0 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-3.5.0 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-3.4.0 lib/twitter_cldr/segmentation/parser.rb
twitter_cldr-3.3.0 lib/twitter_cldr/segmentation/parser.rb