# # Various methods shared by the pure-Ruby and the extended implementation. # # Many of these methods are hotspots, so they are defined directly on # the including classes for better performance. # class CharacterSet module SharedMethods def self.included(klass) klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1 LoadError = Class.new(::LoadError) class << self def [](*args) new(Array(args)) end def of(*args) args.map do |arg| arg.is_a?(Regexp) ? of_regexp(arg) : of_string(arg) end.reduce(:merge) || new end def parse(string) codepoints = Parser.codepoints_from_bracket_expression(string) result = new(codepoints) string.start_with?('[^') ? result.inversion : result end def of_property(property_name) require_optional_dependency('regexp_property_values', __method__) property = RegexpPropertyValues[property_name.to_s] from_ranges(*property.matched_ranges) end def of_regexp(regexp) require_optional_dependency('regexp_parser', __method__) root = ::Regexp::Parser.parse(regexp) of_expression(root) end def of_expression(expression) ExpressionConverter.convert(expression, self) end def require_optional_dependency(name, method) required_optional_dependencies[name] ||= begin require name true rescue ::LoadError raise LoadError, 'You must install the optional dependency '\ "'\#{name}' to use the method `\#{method}'." end end def required_optional_dependencies @required_optional_dependencies ||= {} end end # class << self def initialize(enumerable = []) merge(Parser.codepoints_from_enumerable(enumerable)) end def replace(enum) unless [Array, CharacterSet, Range].include?(enum.class) enum = self.class.new(enum) end clear merge(enum) end # CharacterSet-specific conversion methods def assigned_part self & self.class.assigned end def valid_part self - self.class.surrogate end # CharacterSet-specific stringification methods def to_s(opts = {}, &block) Writer.write(ranges, opts, &block) end def to_s_with_surrogate_ranges Writer.write_surrogate_ranges(bmp_part.ranges, astral_part.ranges) end def to_s_with_surrogate_alternation Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges) end def secure_token(length = 32) CharacterSet.require_optional_dependency('securerandom', __method__) cps = to_a len = cps.count 1.upto(length).map { cps[SecureRandom.random_number(len)] }.pack('U*') end alias random_token secure_token def inspect len = length "#<#{klass.name}: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>" end # C-extension adapter methods. Need overriding in pure fallback. # Parsing kwargs in C is slower, verbose, and kinda deprecated. def inversion(include_surrogates: false, upto: 0x10FFFF) ext_inversion(include_surrogates, upto) end def section(from:, upto: 0x10FFFF) ext_section(from, upto) end def count_in_section(from:, upto: 0x10FFFF) ext_count_in_section(from, upto) end def section?(from:, upto: 0x10FFFF) ext_section?(from, upto) end def section_ratio(from:, upto: 0x10FFFF) ext_section_ratio(from, upto) end # # The following methods are here for `Set` compatibility, but they are # comparatively slow. Prefer others. # def map! block_given? or return enum_for(__method__) { size } arr = [] each { |cp| arr << yield(cp) } replace(arr) end alias collect! map! def reject!(&block) block_given? or return enum_for(__method__) { size } old_size = size delete_if(&block) self if size != old_size end def select!(&block) block_given? or return enum_for(__method__) { size } old_size = size keep_if(&block) self if size != old_size end alias filter! select! def classify block_given? or return enum_for(__method__) { size } each_with_object({}) { |cp, h| (h[yield(cp)] ||= self.class.new).add(cp) } end def divide(&func) require 'character_set/ruby_fallback' CharacterSet::RubyFallback::Set.new(to_a).divide(&func) end def join(separator = '') to_a(true).join(separator) end RUBY # CharacterSet-specific section methods { ascii: 0..0x7F, bmp: 0..0xFFFF, astral: 0x10000..0x10FFFF, }.each do |section_name, range| klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1 def #{section_name}_part section(from: #{range.begin}, upto: #{range.end}) end def #{section_name}_part? section?(from: #{range.begin}, upto: #{range.end}) end def #{section_name}_only? #{range.begin == 0 ? "!section?(from: #{range.end}, upto: 0x10FFFF)" : "!section?(from: 0, upto: #{range.begin})"} end def #{section_name}_ratio section_ratio(from: #{range.begin}, upto: #{range.end}) end RUBY end end # self.included end # SharedMethods end