Sha256: bb2111b8e6b39987a3014d8978ecc5d5687052b588bb6f1893a41a6d5658aee4

Contents?: true

Size: 1.2 KB

Versions: 3

Compression:

Stored size: 1.2 KB

Contents

module Unicoder
  module Builder
    # Assigns categories to every codepoint using a multi dimensional Array index structure
    class Categories
      include Builder
      include MultiDimensionalArrayBuilder

      def initialize_index
        @index = {
          CATEGORIES: [],
          CATEGORY_NAMES: {},
        }
        @range_start = nil
      end

      def parse!
        parse_file :general_categories, :line, regex: /^(?<from>[^. ]+)(?:..(?<to>\S+))?\s*; (?<category>\S+).*$/ do |line|
          if line["to"]
            (line["from"].to_i(16)..line["to"].to_i(16)).each{ |codepoint|
              assign_codepoint(codepoint, line["category"] == "Cn" ? nil : line["category"], @index[:CATEGORIES])
            }
          else
            assign_codepoint(line["from"].to_i(16), line["category"] == "Cn" ? nil : line["category"], @index[:CATEGORIES])
          end
        end

        4.times{ compress! @index[:CATEGORIES] }
        remove_trailing_nils! @index[:CATEGORIES]

        parse_file :property_value_aliases, :line, regex: /^gc ; (?<short>\S{2}?) *; (?<long>\S+).*$/ do |line|
          @index[:CATEGORY_NAMES][line["short"]] = line["long"]
        end

        @index
      end
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
unicoder-1.1.1 lib/unicoder/builders/categories.rb
unicoder-1.1.0 lib/unicoder/builders/categories.rb
unicoder-1.0.0 lib/unicoder/builders/categories.rb