lib/unicoder/builders/categories.rb in unicoder-0.1.0 vs lib/unicoder/builders/categories.rb in unicoder-1.0.0
- old
+ new
@@ -12,26 +12,21 @@
}
@range_start = nil
end
def parse!
- parse_file :unicode_data, :line, regex: /^(?<codepoint>.+?);(?<range><(?!control).+>)?.*?;(?<category>.+?);.*$/ do |line|
- if line["range"]
- if line["range"] =~ /First/
- @range_start = line["codepoint"].to_i(16)
- elsif line["range"] =~ /Last/ && @range_start
- (@range_start..line["codepoint"].to_i(16)).each{ |codepoint|
- assign_codepoint(codepoint, line["category"], @index[:CATEGORIES])
- }
- else
- raise ArgumentError, "inconsistent range found in data, don't know what to do"
- end
+ parse_file :general_categories, :line, regex: /^(?<from>[^. ]+)(?:..(?<to>\S+))?\s*; (?<category>\S+).*$/ do |line|
+ if line["to"]
+ (line["from"].to_i(16)..line["to"].to_i(16)).each{ |codepoint|
+ assign_codepoint(codepoint, line["category"] == "Cn" ? nil : line["category"], @index[:CATEGORIES])
+ }
else
- assign_codepoint(line["codepoint"].to_i(16), line["category"], @index[:CATEGORIES])
+ assign_codepoint(line["from"].to_i(16), line["category"] == "Cn" ? nil : line["category"], @index[:CATEGORIES])
end
end
4.times{ compress! @index[:CATEGORIES] }
+ remove_trailing_nils! @index[:CATEGORIES]
parse_file :property_value_aliases, :line, regex: /^gc ; (?<short>\S{2}?) *; (?<long>\S+).*$/ do |line|
@index[:CATEGORY_NAMES][line["short"]] = line["long"]
end