Sha256: 09cc4662253188a32d72b4a8b91f129891fb5f434f01fbc1d2c8efbe40daa00d

Contents?: true

Size: 1.73 KB

Versions: 6

Compression:

Stored size: 1.73 KB

Contents

# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
  module Shared
    class UnicodeRegex

      class << self

        def compile(str, modifiers = "", symbol_table = nil)
          new(
            parser.parse(tokenizer.tokenize(str), {
              :symbol_table => symbol_table
            }), modifiers
          )
        end

        # All unicode characters
        def all_unicode
          @all_unicode ||= TwitterCldr::Utils::RangeSet.new(
            [0..0x10FFFF]
          )
        end

        # A few <control> characters (i.e. 2..7) and public/private surrogates (i.e. 55296..57343).
        # These don't play nicely with Ruby's regular expression engine, and I think we
        # can safely disregard them.
        def invalid_regexp_chars
          @invalid_regexp_chars ||= TwitterCldr::Utils::RangeSet.new(
            [2..7, 55296..57343]
          )
        end

        def valid_regexp_chars
          @valid_regexp_chars ||= all_unicode.subtract(invalid_regexp_chars)
        end

        private

        def tokenizer
          @tokenizer ||= TwitterCldr::Tokenizers::UnicodeRegexTokenizer.new
        end

        def parser
          @parser ||= TwitterCldr::Parsers::UnicodeRegexParser.new
        end

      end

      extend Forwardable
      def_delegator :to_regexp, :match
      def_delegator :to_regexp, :=~

      attr_reader :elements, :modifiers

      def initialize(elements, modifiers = nil)
        @elements = elements
        @modifiers = nil
      end

      def to_regexp
        @regexp ||= Regexp.new(to_regexp_str, modifiers)
      end

      def to_regexp_str
        @regexp_str ||= elements.map(&:to_regexp_str).join
      end

    end
  end
end

Version data entries

6 entries across 6 versions & 2 rubygems

Version Path
solidus_backend-1.0.0.pre3 vendor/bundle/gems/twitter_cldr-3.2.1/lib/twitter_cldr/shared/unicode_regex.rb
solidus_backend-1.0.0.pre2 vendor/bundle/gems/twitter_cldr-3.2.1/lib/twitter_cldr/shared/unicode_regex.rb
solidus_backend-1.0.0.pre vendor/bundle/gems/twitter_cldr-3.1.2/lib/twitter_cldr/shared/unicode_regex.rb
twitter_cldr-3.2.1 lib/twitter_cldr/shared/unicode_regex.rb
twitter_cldr-3.2.0 lib/twitter_cldr/shared/unicode_regex.rb
twitter_cldr-3.1.2 lib/twitter_cldr/shared/unicode_regex.rb