Sha256: 901bf10f7b677686fc3ee1059ee9c87f764731ebea9986bf230a75ff708ac28a
Contents?: true
Size: 1.9 KB
Versions: 4
Compression:
Stored size: 1.9 KB
Contents
module Unicoder module Builder class Types include Builder include MultiDimensionalArrayBuilder NONCHARACTERS = [ *0xFDD0..0xFDEF, 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF, ] def initialize_index @index = { TYPES: [], TYPE_NAMES: %w[ Graphic Format Control Private-use Surrogate Noncharacter Reserved ], OFFSETS: [ 0x10000, 0x1000, 0x100, 0x10 ], } end def parse! parse_file :general_categories, :line, regex: /^(?<from>[^. ]+)(?:..(?<to>\S+))?\s*; (?<category>\S+).*$/ do |line| if line["to"] codepoints = Range.new(line["from"].to_i(16), line["to"].to_i(16)) else codepoints = [line["from"].to_i(16)] end codepoints.each{ |codepoint| case line["category"] when "Cf", "Zl", "Zp" type = 1 when "Cc" type = 2 when "Co" type = 3 when "Cs" type = 4 when "Cn" if NONCHARACTERS.include?(codepoint) type = 5 else type = 6 end end assign :TYPES, codepoint, type } end 4.times{ compress! @index[:TYPES] } end end end end
Version data entries
4 entries across 4 versions & 1 rubygems
Version | Path |
---|---|
unicoder-1.3.0 | lib/unicoder/builders/types.rb |
unicoder-1.1.1 | lib/unicoder/builders/types.rb |
unicoder-1.1.0 | lib/unicoder/builders/types.rb |
unicoder-1.0.0 | lib/unicoder/builders/types.rb |