Sha256: 0243d33452a06e51f99d5ccca1e430e742c6632bbf47a14f34d43ef024bf6373
Contents?: true
Size: 1.2 KB
Versions: 13
Compression:
Stored size: 1.2 KB
Contents
# encoding: UTF-8 # Copyright 2012 Twitter, Inc # http://www.apache.org/licenses/LICENSE-2.0 module TwitterCldr module Parsers class UnicodeRegexParser class Component protected def to_utf8(codepoints) # note: we do this for ruby 1.8 compatibility # if we didn't have to support 1.8, we could do this instead: # Array(codepoints).map { |cp| "\\u{#{cp.to_s(16).rjust(4, "0")}}"}.join Array(codepoints).pack("U*").bytes.map { |s| "\\" + s.to_s(8) }.join end def range_to_regex(range) if range.first.is_a?(Array) array_to_regex(range) else "[#{to_utf8(range.first)}-#{to_utf8(range.last)}]" end end def array_to_regex(arr) arr.map { |elem| "(?:#{to_utf8(elem)})" }.join end def set_to_regex(set) strs = set.to_a(true).uniq.map do |obj| case obj when Range range_to_regex(obj) when Array array_to_regex(obj) else to_utf8(obj) end end "(?:#{strs.join("|")})" end end end end end
Version data entries
13 entries across 13 versions & 1 rubygems