Sha256: a4b0f3f29293b172b6fb533371b02a8f86d8c6960ab935f4a0301b061e5f5fe1
Contents?: true
Size: 1.49 KB
Versions: 2
Compression:
Stored size: 1.49 KB
Contents
require 'open-uri' URL = 'https://www.unicode.org/Public/12.1.0/ucd/EastAsianWidth.txt' TYPES = Hash.new def output_line(curr_start, curr_end, curr_type) type_id = TYPES[curr_type] ||= TYPES.keys.length if curr_end diff = curr_end.to_i(16) - curr_start.to_i(16) printf("[%d,%d,%d],", type_id, curr_start.to_i(16), diff) else printf("[%d,%d],", type_id, curr_start.to_i(16)) end end def output_header(line) print line.sub(/^# (EastAsianWidth-.*)$/){ "## #{$1}" } print <<~EOB module Unicode module Eaw EOB print " DATA=[" end content = open(URL).read prev_start = prev_end = prev_type = nil content.each_line.with_index do |line, n| if n == 0 output_header(line) elsif line =~ /^([0-9A-Z]+)(?:\.\.([0-9A-Z]+))?;(\w+)/ curr_start, curr_end, curr_type = $1, $2, $3 if curr_type == prev_type && ((prev_end && prev_end.to_i(16) + 1 == curr_start.to_i(16)) || (!prev_end && prev_start.to_i(16) + 1 == curr_start.to_i(16))) ## concat this line with previous line if curr_end prev_end = curr_end else prev_end = curr_start end else if prev_start ## output previous line output_line(prev_start, prev_end, prev_type) end prev_start, prev_end, prev_type = curr_start, curr_end, curr_type end end end ## output last line output_line(prev_start, prev_end, prev_type) puts "]" types_str = TYPES.keys.map{|ch| ':'+ch}.join(",") puts ' TYPES=[' + types_str + ']' puts <<~EOB end end EOB
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
unicode-eaw-2.0.0 | tools/generate.rb |
unicode-eaw-1.0.0 | tools/generate.rb |