lib/unicoder/builders/confusable.rb in unicoder-1.1.1 vs lib/unicoder/builders/confusable.rb in unicoder-1.3.0

- old
+ new

@@ -1,20 +1,41 @@ module Unicoder module Builder class Confusable include Builder + def initialize_index + @index = { + CONFUSABLE: {}, + IGNORABLE: [], + } + end + def parse! - parse_file :confusables, :line, regex: /^(?<from>\S+)\s+;\s+(?<to>.+)\s+;.*$/ do |line| + parse_file :confusables, :line, regex: /^(?<from>\S+)\s+;\s+(?<to>.+?)\s+;.*$/ do |line| source = line["from"].to_i(16) if line["to"].include?(" ") replace_with = line["to"].split(" ").map{ |codepoint| + cp = codepoint.to_i(16) + option =~ /charvalues/ ? [cp].pack("U") : cp + } + else + cp = line["to"].to_i(16) + replace_with = option =~ /charvalues/ ? [cp].pack("U") : cp + end + assign :CONFUSABLE, source, replace_with + end + + parse_file :core_properties, :line, begin: /^# Derived Property: Default_Ignorable_Code_Point$/, end: /^# ================================================$/, regex: /^(?<codepoints>\S+)\s+; Default_Ignorable_Code_Point.*$/ do |line| + if line["codepoints"]['..'] + single_or_multiple_codepoints = line["codepoints"].split('..').map{ |codepoint| codepoint.to_i(16) } else - replace_with = line["to"].to_i(16) + single_or_multiple_codepoints = line["codepoints"].to_i(16) end - @index[source] = replace_with + + @index[:IGNORABLE] << single_or_multiple_codepoints end end end end end