Sha256: f968d5c283d78e3ac98ba18aa0edd05623939dd7fcfe84e684bfb413be8cda02

Contents?: true

Size: 1.38 KB

Versions: 1

Compression:

Stored size: 1.38 KB

Contents

module Unicoder
  module Builder
    # Assigns categories to every codepoint using a multi dimensional Array index structure
    class Categories
      include Builder
      include MultiDimensionalArrayBuilder

      def initialize_index
        @index = {
          CATEGORIES: [],
          CATEGORY_NAMES: {},
        }
        @range_start = nil
      end

      def parse!
        parse_file :unicode_data, :line, regex: /^(?<codepoint>.+?);(?<range><(?!control).+>)?.*?;(?<category>.+?);.*$/ do |line|
          if line["range"]
            if line["range"] =~ /First/
              @range_start = line["codepoint"].to_i(16)
            elsif line["range"] =~ /Last/ && @range_start
              (@range_start..line["codepoint"].to_i(16)).each{ |codepoint|
                assign_codepoint(codepoint, line["category"], @index[:CATEGORIES])
              }
            else
              raise ArgumentError, "inconsistent range found in data, don't know what to do"
            end
          else
            assign_codepoint(line["codepoint"].to_i(16), line["category"], @index[:CATEGORIES])
          end
        end

        4.times{ compress! @index[:CATEGORIES] }

        parse_file :property_value_aliases, :line, regex: /^gc ; (?<short>\S{2}?) *; (?<long>\S+).*$/ do |line|
          @index[:CATEGORY_NAMES][line["short"]] = line["long"]
        end

        @index
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
unicoder-0.1.0 lib/unicoder/builders/categories.rb