require 'pstore' require 'singleton' module RegexpExamples # Interface to the retrieve the character sets that match a regex named property. # E.g. `/\p{Alpha}/` # These matching values are stored, compressed, in a PStore. They are specific to # the ruby minor version. class UnicodeCharRanges include Singleton # These values were generated by: scripts/unicode_lister.rb # Note: Only the first 128 results are listed, for performance. # Also, some groups seem to have no matches (weird!) STORE_FILENAME = "unicode_ranges_#{RbConfig::CONFIG['UNICODE_VERSION']}.pstore".freeze attr_reader :range_store def initialize @range_store = PStore.new(unicode_ranges_file) end def get(key) range_store.transaction(true) do ranges_to_unicode(range_store[key]) end end alias [] get private # The method is written like this to future-proof it a little, # i.e. the gem won't completely break for a new ruby version release def unicode_ranges_file db_path = File.join(__dir__, '../../db') Dir["#{db_path}/*.pstore"].sort.select do |file| file <= "#{db_path}/#{STORE_FILENAME}" end.last end # TODO: Document example input/output of this method # It's pretty simple, but this code is a little confusing!! def ranges_to_unicode(ranges) result = [] ranges.each do |range| if range.is_a? Integer # Small hack to increase data compression result << hex_to_unicode(range.to_s(16)) else range.each { |num| result << hex_to_unicode(num.to_s(16)) } end end result end def hex_to_unicode(hex) [hex.to_i(16)].pack('U') end end end