tools/mkunidata.rb in unicode-0.1.1 vs tools/mkunidata.rb in unicode-0.2.0

- old
+ new

@@ -1,15 +1,15 @@ #! /usr/local/bin/ruby -KU -if $KCODE != 'UTF8' - raise "$KCODE must be UTF8" -end +#if $KCODE != 'UTF8' +# raise "$KCODE must be UTF8" +#end HEAD=<<EOS /* * UnicodeData - * 1999 by yoshidam + * Copyright 1999, 2004 by yoshidam * */ #ifndef _UNIDATA_MAP #define _UNIDATA_MAP @@ -23,11 +23,11 @@ const int uppercase; const int lowercase; const int titlecase; }; -const static struct unicode_data unidata[] = { +static const struct unicode_data unidata[] = { EOS TAIL=<<EOS }; @@ -39,11 +39,11 @@ return [nil, nil] end canon = "" compat = "" chars = hex.split(" ") - if chars[0] =~ /^[0-9A-F]{4}$/ + if chars[0] =~ /^[0-9A-F]{4,6}$/ chars.each do |c| canon << [c.hex].pack("U") end compat = canon elsif chars[0] =~ /^<.+>$/ @@ -57,11 +57,11 @@ end [canon, compat] end def hex_or_nil(str) - return "-1" if str.nil? + return "-1" if str.nil? || str == '' return format("0x%04x", str.hex) end def printstr(str) return "NULL" if !str @@ -79,23 +79,33 @@ ## scan Composition Exclusions exclusion = {} open(ARGV[1]) do |f| while l = f.gets next if l =~ /^\#/ || l =~ /^$/ + next if l !~ /Full_Composition_Exclusion/ code, = l.split(/\s/) - code = code.hex - exclusion[code] = true + if code =~ /^[0-9A-F]+$/ + code = code.hex + exclusion[code] = true + elsif code =~ /^([0-9A-F]+)\.\.([0-9A-F]+)$/ +# p [$1, $2] + scode = $1.hex + ecode = $2.hex + for code in scode..ecode + exclusion[code] = true + end + end end end ## scan UnicodeData udata = {} open(ARGV[0]) do |f| while l = f.gets l.chomp! code, charname, gencat, ccclass, bidicat,decomp, dec, digit, num, mirror, uni1_0, comment, upcase, - lowcase, titlecase = l.split(";"); + lowcase, titlecase = l.split(";", 15); code = code.hex ccclass = ccclass.to_i canon, compat = hex2str(decomp) upcase = hex_or_nil(upcase) lowcase = hex_or_nil(lowcase)