Sha256: aa04d180ef6312c6c573b79b172fd55a382e1051cede15a50e6433807adc7ce2

Contents?: true

Size: 1.58 KB

Versions: 2

Compression:

Stored size: 1.58 KB

Contents

require 'pstore'

module RegexpExamples
  # Interface to the retrieve the character sets that match a regex named property.
  # E.g. `/\p{Alpha}/`
  # These matching values are stored, compressed, in a PStore. They are specific to
  # the ruby minor version.
  class UnicodeCharRanges
    # These values were generated by: scripts/unicode_lister.rb
    # Note: Only the first 128 results are listed, for performance.
    # Also, some groups seem to have no matches (weird!)
    # (Don't care about ruby micro version number)
    STORE_FILENAME = "unicode_ranges_#{RUBY_VERSION[0..2]}.pstore"

    attr_reader :range_store

    def initialize
      @range_store = PStore.new(unicode_ranges_file)
    end

    def get(key)
      range_store.transaction(true) do
        ranges_to_unicode(range_store[key])
      end
    end

    alias_method :[], :get

    private

    def unicode_ranges_file
      db_path = File.join(__dir__, '../../db')
      Dir["#{db_path}/*.pstore"].sort.select do |file|
        file <= "#{db_path}/unicode_ranges_#{RUBY_VERSION[0..2]}.pstore"
      end.last
    end

    # TODO: Document example input/output of this method
    # It's pretty simple, but this code is a little confusing!!
    def ranges_to_unicode(ranges)
      result = []
      ranges.each do |range|
        if range.is_a? Fixnum # Small hack to increase data compression
          result << hex_to_unicode(range.to_s(16))
        else
          range.each { |num| result << hex_to_unicode(num.to_s(16)) }
        end
      end
      result
    end

    def hex_to_unicode(hex)
      [hex.to_i(16)].pack('U')
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
regexp-examples-1.3.1 lib/regexp-examples/unicode_char_ranges.rb
regexp-examples-1.3.0 lib/regexp-examples/unicode_char_ranges.rb