tools/mkunidata.rb in unicode-0.1.1 vs tools/mkunidata.rb in unicode-0.2.0
- old
+ new
@@ -1,15 +1,15 @@
#! /usr/local/bin/ruby -KU
-if $KCODE != 'UTF8'
- raise "$KCODE must be UTF8"
-end
+#if $KCODE != 'UTF8'
+# raise "$KCODE must be UTF8"
+#end
HEAD=<<EOS
/*
* UnicodeData
- * 1999 by yoshidam
+ * Copyright 1999, 2004 by yoshidam
*
*/
#ifndef _UNIDATA_MAP
#define _UNIDATA_MAP
@@ -23,11 +23,11 @@
const int uppercase;
const int lowercase;
const int titlecase;
};
-const static struct unicode_data unidata[] = {
+static const struct unicode_data unidata[] = {
EOS
TAIL=<<EOS
};
@@ -39,11 +39,11 @@
return [nil, nil]
end
canon = ""
compat = ""
chars = hex.split(" ")
- if chars[0] =~ /^[0-9A-F]{4}$/
+ if chars[0] =~ /^[0-9A-F]{4,6}$/
chars.each do |c|
canon << [c.hex].pack("U")
end
compat = canon
elsif chars[0] =~ /^<.+>$/
@@ -57,11 +57,11 @@
end
[canon, compat]
end
def hex_or_nil(str)
- return "-1" if str.nil?
+ return "-1" if str.nil? || str == ''
return format("0x%04x", str.hex)
end
def printstr(str)
return "NULL" if !str
@@ -79,23 +79,33 @@
## scan Composition Exclusions
exclusion = {}
open(ARGV[1]) do |f|
while l = f.gets
next if l =~ /^\#/ || l =~ /^$/
+ next if l !~ /Full_Composition_Exclusion/
code, = l.split(/\s/)
- code = code.hex
- exclusion[code] = true
+ if code =~ /^[0-9A-F]+$/
+ code = code.hex
+ exclusion[code] = true
+ elsif code =~ /^([0-9A-F]+)\.\.([0-9A-F]+)$/
+# p [$1, $2]
+ scode = $1.hex
+ ecode = $2.hex
+ for code in scode..ecode
+ exclusion[code] = true
+ end
+ end
end
end
## scan UnicodeData
udata = {}
open(ARGV[0]) do |f|
while l = f.gets
l.chomp!
code, charname, gencat, ccclass, bidicat,decomp,
dec, digit, num, mirror, uni1_0, comment, upcase,
- lowcase, titlecase = l.split(";");
+ lowcase, titlecase = l.split(";", 15);
code = code.hex
ccclass = ccclass.to_i
canon, compat = hex2str(decomp)
upcase = hex_or_nil(upcase)
lowcase = hex_or_nil(lowcase)