lib/unibits/symbolify.rb in unibits-2.0.0 vs lib/unibits/symbolify.rb in unibits-2.1.0
- old
+ new
@@ -1,8 +1,8 @@
module Unibits
module Symbolify
- NO_UTF8_CONVERTER = /^Windows-1258/
+ NO_UTF8_CONVERTER = /^(Windows-1258|IBM864|macCentEuro|macThai)/
ASCII_CHARS = "\x20-\x7E".freeze
ASCII_CONTROL_CODEPOINTS = "\x00-\x1F\x7F".freeze
ASCII_CONTROL_SYMBOLS = "\u{2400}-\u{241F}\u{2421}".freeze
TAGS = "\u{E0021}-\u{E007E}".freeze
@@ -352,10 +352,31 @@
0xE01ED => "VS254",
0xE01EE => "VS255",
0xE01EF => "VS256",
}.freeze
+ INTERESTING_BYTES_ENCODINGS = {
+ 0xD8 => /^macCroatian/,
+ 0xF0 => /^mac(Iceland|Roman|Turkish)/,
+ 0xFD => /^(ISO-8859-8|Windows-(1255|1256))/,
+ 0xFE => /^(ISO-8859-8|Windows-(1255|1256))/,
+ }.freeze
+
+ INTERESTING_BYTES_VALUES = {
+ 0xD8 => "Logo",
+ 0xF0 => "Logo",
+ 0xFD => "LRM",
+ 0xFE => "RLM",
+ }.freeze
+
+ MAC_KEY_SYMBOLS = {
+ 0x11 => "⌘",
+ 0x12 => "⇧",
+ 0x13 => "⌥",
+ 0x14 => "⌃",
+ }
+
def self.symbolify(char, char_info)
if !char_info.valid?
"�"
else
case char_info
@@ -405,16 +426,22 @@
treat_char_unconverted = false
if char_info.delete?
char = CONTROL_DELETE_SYMBOL
elsif char_info.c0?
+ if ord >= 0x11 && ord <= 0x14 && encoding.name =~ /^mac/
+ char = MAC_KEY_SYMBOLS[ord]
+ else
char = CONTROL_C0_SYMBOLS[ord]
+ end
elsif char_info.c1?
char = CONTROL_C1_NAMES[ord]
elsif no_converter
treat_char_unconverted = true
elsif char_info.blank?
char = "]".encode(encoding) + char + "[".encode(encoding)
+ elsif INTERESTING_BYTES_ENCODINGS[ord] =~ encoding.name
+ char = INTERESTING_BYTES_VALUES[ord]
end
if no_converter && treat_char_unconverted
char.inspect
else