Sha256: 7c2f760d4c7031e4db317f69fd2fb6b209bb4a16c816cc326527d12ef01e7d3e

Contents?: true

Size: 1.31 KB

Versions: 1

Compression:

Stored size: 1.31 KB

Contents

module Unicoder
  module Builder
    # Assigns categories to every codepoint using a multi dimensional Array index structure
    class Categories
      include Builder
      include MultiDimensionalArrayBuilder

      def initialize_index
        @index = {
          CATEGORIES: [],
          CATEGORY_NAMES: {},
          OFFSETS: [
            0x10000,
            0x1000,
            0x100,
            0x10
          ],
        }
        @range_start = nil
      end

      def parse!
        parse_file :general_categories, :line, regex: /^(?<from>[^. ]+)(?:..(?<to>\S+))?\s*; (?<category>\S+).*$/ do |line|
          if line["to"]
            (line["from"].to_i(16)..line["to"].to_i(16)).each{ |codepoint|
              assign_codepoint(codepoint, line["category"] == "Cn" ? nil : line["category"], @index[:CATEGORIES])
            }
          else
            assign_codepoint(line["from"].to_i(16), line["category"] == "Cn" ? nil : line["category"], @index[:CATEGORIES])
          end
        end

        4.times{ compress! @index[:CATEGORIES] }
        remove_trailing_nils! @index[:CATEGORIES]

        parse_file :property_value_aliases, :line, regex: /^gc ; (?<short>\S{2}?) *; (?<long>\S+).*$/ do |line|
          @index[:CATEGORY_NAMES][line["short"]] = line["long"]
        end

        @index
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
unicoder-1.3.0 lib/unicoder/builders/categories.rb