require 'pstore' module RegexpExamples # Interface to the retrieve the character sets that match a regex named property. # E.g. `/\p{Alpha}/` # These matching values are stored, compressed, in a PStore. They are specific to # the ruby minor version. class UnicodeCharRanges # These values were generated by: scripts/unicode_lister.rb # Note: Only the first 128 results are listed, for performance. # Also, some groups seem to have no matches (weird!) # (Don't care about ruby micro version number) STORE_FILENAME = "unicode_ranges_#{RUBY_VERSION[0..2]}.pstore" attr_reader :range_store def initialize @range_store = PStore.new(unicode_ranges_file) end def get(key) range_store.transaction(true) do ranges_to_unicode(range_store[key]) end end alias_method :[], :get private def unicode_ranges_file db_path = File.join(__dir__, '../../db') Dir["#{db_path}/*.pstore"].sort.select do |file| file <= "#{db_path}/unicode_ranges_#{RUBY_VERSION[0..2]}.pstore" end.last end # TODO: Document example input/output of this method # It's pretty simple, but this code is a little confusing!! def ranges_to_unicode(ranges) result = [] ranges.each do |range| if range.is_a? Fixnum # Small hack to increase data compression result << hex_to_unicode(range.to_s(16)) else range.each { |num| result << hex_to_unicode(num.to_s(16)) } end end result end def hex_to_unicode(hex) [hex.to_i(16)].pack('U') end end end