Sha256: 4177b9e4579e1e42c8ceb21055c970572214f4b030b75bc48a5b7b8f89c9e495

Contents?: true

Size: 1.49 KB

Versions: 11

Compression:

Stored size: 1.49 KB

Contents

#!/usr/bin/env ruby

require 'open-uri'
require 'csv'

# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2

csv_options = { :col_sep => ';', :skip_blanks => true, :skip_lines => /\A#/ }

unicode_data = URI('https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt')
derived_joining_type = URI('https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedJoiningType.txt')

# https://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values
virama_canonical_combining_class = '9'

virama_codes = CSV.new(unicode_data.read, **csv_options).select do |code, _name, _category, canonical_combining_class|
  canonical_combining_class == virama_canonical_combining_class
end.map(&:first)

# https://www.unicode.org/reports/tr44/#Default_Values
# https://www.unicode.org/reports/tr44/#Derived_Extracted
codes_by_joining_type = CSV.new(derived_joining_type.read, **csv_options).group_by do |_code, joining_type|
  joining_type.gsub(/#.+/, '').strip
end.transform_values do |rows|
  rows.map do |code, _joining_type|
    code.strip
  end
end

def codes_to_character_class(codes)
  characters = codes.map do |code|
    code.gsub(/(\h+)/, '\u{\1}').gsub('..', '-')
  end
  "[#{characters.join}]"
end

puts "VIRAMA_CHARACTER_CLASS = '#{codes_to_character_class(virama_codes)}'"

codes_by_joining_type.slice('L', 'D', 'T', 'R').each do |joining_type, codes|
  puts "JOINING_TYPE_#{joining_type}_CHARACTER_CLASS = '#{codes_to_character_class(codes)}'"
end

Version data entries

11 entries across 11 versions & 1 rubygems

Version Path
json_schemer-2.3.0 bin/hostname_character_classes
json_schemer-2.2.1 bin/hostname_character_classes
json_schemer-2.2.0 bin/hostname_character_classes
json_schemer-2.1.1 bin/hostname_character_classes
json_schemer-2.1.0 bin/hostname_character_classes
json_schemer-2.0.0 bin/hostname_character_classes
json_schemer-1.0.3 bin/hostname_character_classes
json_schemer-1.0.2 bin/hostname_character_classes
json_schemer-1.0.1 bin/hostname_character_classes
json_schemer-1.0.0 bin/hostname_character_classes
json_schemer-0.2.25 bin/hostname_character_classes