lib/unibits/symbolify.rb in unibits-1.1.0 vs lib/unibits/symbolify.rb in unibits-1.2.0
- old
+ new
@@ -1,5 +1,7 @@
+require "unicode/categories"
+
module Unibits
module Symbolify
ASCII_CONTROL_CODEPOINTS = "\x00-\x1F\x7F".freeze
ASCII_CONTROL_SYMBOLS = "\u{2400}-\u{241F}\u{2421}".freeze
ASCII_CHARS = "\x20-\x7E".freeze
@@ -72,24 +74,22 @@
"\u{FE0D}" => "VS14",
"\u{FE0E}" => "VS15",
"\u{FE0F}" => "VS16",
}.freeze
COULD_BE_WHITESPACE = '[\p{Space}⠀]'.freeze
- # UNASSIGNED = '\p{Cn}'.freeze
def self.symbolify(char, encoding = char.encoding)
+ return "n/a" if Unicode::Categories.category(char) == "Cn"
+
char.tr!(
ASCII_CONTROL_CODEPOINTS.encode(encoding),
ASCII_CONTROL_SYMBOLS.encode(encoding)
)
char.gsub!(
Regexp.compile(COULD_BE_WHITESPACE.encode(encoding)),
']\0['.encode(encoding)
)
- # char.gsub!(
- # Regexp.compile(UNASSIGNED.encode(encoding)),
- # 'n/a'.encode(encoding)
- # )
+
INTERESTING_CODEPOINTS.each{ |cp, desc|
char.gsub! Regexp.compile(cp.encode(encoding)), desc.encode(encoding)
}
char.gsub! TAG_START.encode(encoding), TAG_START_SYMBOL.encode(encoding)
char.gsub! TAG_SPACE.encode(encoding), TAG_SPACE_SYMBOL.encode(encoding)