Sha256: bb2111b8e6b39987a3014d8978ecc5d5687052b588bb6f1893a41a6d5658aee4
Contents?: true
Size: 1.2 KB
Versions: 3
Compression:
Stored size: 1.2 KB
Contents
module Unicoder module Builder # Assigns categories to every codepoint using a multi dimensional Array index structure class Categories include Builder include MultiDimensionalArrayBuilder def initialize_index @index = { CATEGORIES: [], CATEGORY_NAMES: {}, } @range_start = nil end def parse! parse_file :general_categories, :line, regex: /^(?<from>[^. ]+)(?:..(?<to>\S+))?\s*; (?<category>\S+).*$/ do |line| if line["to"] (line["from"].to_i(16)..line["to"].to_i(16)).each{ |codepoint| assign_codepoint(codepoint, line["category"] == "Cn" ? nil : line["category"], @index[:CATEGORIES]) } else assign_codepoint(line["from"].to_i(16), line["category"] == "Cn" ? nil : line["category"], @index[:CATEGORIES]) end end 4.times{ compress! @index[:CATEGORIES] } remove_trailing_nils! @index[:CATEGORIES] parse_file :property_value_aliases, :line, regex: /^gc ; (?<short>\S{2}?) *; (?<long>\S+).*$/ do |line| @index[:CATEGORY_NAMES][line["short"]] = line["long"] end @index end end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
unicoder-1.1.1 | lib/unicoder/builders/categories.rb |
unicoder-1.1.0 | lib/unicoder/builders/categories.rb |
unicoder-1.0.0 | lib/unicoder/builders/categories.rb |