# encoding: UTF-8 # Copyright 2012 Twitter, Inc # http://www.apache.org/licenses/LICENSE-2.0 module TwitterCldr module Parsers class UnicodeRegexParser # unicode_char, escaped_char, string, multichar_string # Can exist inside and outside of character classes class UnicodeString < Component attr_reader :codepoints def initialize(codepoints) @codepoints = codepoints end def to_set # If the number of codepoints is greater than 1, treat them as a # group (eg. multichar string). This is definitely a hack in that # it means there has to be special logic in RangeSet that deals # with data types that aren't true integer ranges. I can't think # of any other way to support multichar strings :( if codepoints.size > 1 TwitterCldr::Utils::RangeSet.new([codepoints..codepoints]) else TwitterCldr::Utils::RangeSet.new([codepoints.first..codepoints.first]) end end def to_regexp_str cps = codepoints.is_a?(Array) ? codepoints : [codepoints] array_to_regex(cps) end end end end end