Sha256: 8c705a9902eacfc375eb59eca838fb0b562ca1c477eaad813e3ebf8423a36dc8

Contents?: true

Size: 1.42 KB

Versions: 3

Compression:

Stored size: 1.42 KB

Contents

require 'pstore'

module RegexpExamples
  # Interface to the retrieve the character sets that match a regex named property.
  # E.g. `/\p{Alpha}/`
  # These matching values are stored, compressed, in a PStore. They are specific to
  # the ruby minor version.
  class UnicodeCharRanges
    # These values were generated by: scripts/unicode_lister.rb
    # Note: Only the first 128 results are listed, for performance.
    # Also, some groups seem to have no matches (weird!)
    # (Don't care about ruby micro version number)
    STORE_FILENAME = "unicode_ranges_#{RUBY_VERSION[0..2]}.pstore"

    attr_reader :range_store

    def initialize(filename = STORE_FILENAME)
      @range_store = PStore.new(File.expand_path("../../../db/#{filename}", __FILE__))
    end

    def get(key)
      range_store.transaction(true) do
        ranges_to_unicode(range_store[key])
      end
    end

    alias_method :[], :get

    private

    # TODO: Document example input/output of this method
    # It's pretty simple, but this code is a little confusing!!
    def ranges_to_unicode(ranges)
      result = []
      ranges.each do |range|
        if range.is_a? Fixnum # Small hack to increase data compression
          result << hex_to_unicode(range.to_s(16))
        else
          range.each { |num| result << hex_to_unicode(num.to_s(16)) }
        end
      end
      result
    end

    def hex_to_unicode(hex)
      [hex.to_i(16)].pack('U')
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
regexp-examples-1.2.1 lib/regexp-examples/unicode_char_ranges.rb
regexp-examples-1.2.0 lib/regexp-examples/unicode_char_ranges.rb
regexp-examples-1.1.4 lib/regexp-examples/unicode_char_ranges.rb