lib/unibits/symbolify.rb in unibits-2.0.0 vs lib/unibits/symbolify.rb in unibits-2.1.0

- old
+ new

@@ -1,8 +1,8 @@ module Unibits module Symbolify - NO_UTF8_CONVERTER = /^Windows-1258/ + NO_UTF8_CONVERTER = /^(Windows-1258|IBM864|macCentEuro|macThai)/ ASCII_CHARS = "\x20-\x7E".freeze ASCII_CONTROL_CODEPOINTS = "\x00-\x1F\x7F".freeze ASCII_CONTROL_SYMBOLS = "\u{2400}-\u{241F}\u{2421}".freeze TAGS = "\u{E0021}-\u{E007E}".freeze @@ -352,10 +352,31 @@ 0xE01ED => "VS254", 0xE01EE => "VS255", 0xE01EF => "VS256", }.freeze + INTERESTING_BYTES_ENCODINGS = { + 0xD8 => /^macCroatian/, + 0xF0 => /^mac(Iceland|Roman|Turkish)/, + 0xFD => /^(ISO-8859-8|Windows-(1255|1256))/, + 0xFE => /^(ISO-8859-8|Windows-(1255|1256))/, + }.freeze + + INTERESTING_BYTES_VALUES = { + 0xD8 => "Logo", + 0xF0 => "Logo", + 0xFD => "LRM", + 0xFE => "RLM", + }.freeze + + MAC_KEY_SYMBOLS = { + 0x11 => "⌘", + 0x12 => "⇧", + 0x13 => "⌥", + 0x14 => "⌃", + } + def self.symbolify(char, char_info) if !char_info.valid? "�" else case char_info @@ -405,16 +426,22 @@ treat_char_unconverted = false if char_info.delete? char = CONTROL_DELETE_SYMBOL elsif char_info.c0? + if ord >= 0x11 && ord <= 0x14 && encoding.name =~ /^mac/ + char = MAC_KEY_SYMBOLS[ord] + else char = CONTROL_C0_SYMBOLS[ord] + end elsif char_info.c1? char = CONTROL_C1_NAMES[ord] elsif no_converter treat_char_unconverted = true elsif char_info.blank? char = "]".encode(encoding) + char + "[".encode(encoding) + elsif INTERESTING_BYTES_ENCODINGS[ord] =~ encoding.name + char = INTERESTING_BYTES_VALUES[ord] end if no_converter && treat_char_unconverted char.inspect else