Sha256: 4a57e4b759769827221c5dc1953a5a5917cf69ac6ed9ae8755c04bc3fc6320e0

Contents?: true

Size: 1.51 KB

Versions: 1

Compression:

Stored size: 1.51 KB

Contents

require 'open-uri'

URL = 'https://www.unicode.org/Public/14.0.0/ucd/EastAsianWidth-14.0.0d2.txt'
TYPES = Hash.new

def output_line(curr_start, curr_end, curr_type)
  type_id = TYPES[curr_type] ||= TYPES.keys.length
  if curr_end
    diff = curr_end.to_i(16) - curr_start.to_i(16)
    printf("[%d,%d,%d],", type_id, curr_start.to_i(16), diff)
  else
    printf("[%d,%d],", type_id, curr_start.to_i(16))
  end
end

def output_header(line)
  print line.sub(/^# (EastAsianWidth-.*)$/){ "## #{$1}" }
  print <<~EOB
module Unicode
  module Eaw
  EOB
  print "    DATA=["
end

content = URI.open(URL).read
prev_start = prev_end = prev_type = nil
content.each_line.with_index do |line, n|
  if n == 0
    output_header(line)
  elsif line =~ /^([0-9A-Z]+)(?:\.\.([0-9A-Z]+))?;(\w+)/
    curr_start, curr_end, curr_type = $1, $2, $3
    if curr_type == prev_type &&
       ((prev_end && prev_end.to_i(16) + 1 == curr_start.to_i(16)) ||
        (!prev_end && prev_start.to_i(16) + 1 == curr_start.to_i(16)))
      ## concat this line with previous line
      if curr_end
        prev_end = curr_end
      else
        prev_end = curr_start
      end
    else
      if prev_start
        ## output previous line
        output_line(prev_start, prev_end, prev_type)
      end
      prev_start, prev_end, prev_type = curr_start, curr_end, curr_type
    end
  end
end

## output last line
output_line(prev_start, prev_end, prev_type)
puts "]"
types_str = TYPES.keys.map{|ch| ':'+ch}.join(",")
puts '    TYPES=[' + types_str + ']'
puts <<~EOB
  end
end
EOB

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
unicode-eaw-2.1.0 tools/generate.rb