lib/unibits/symbolify.rb in unibits-1.3.0 vs lib/unibits/symbolify.rb in unibits-2.0.0
- old
+ new
@@ -1,351 +1,442 @@
-require "unicode/categories"
-
module Unibits
module Symbolify
+ NO_UTF8_CONVERTER = /^Windows-1258/
+ ASCII_CHARS = "\x20-\x7E".freeze
ASCII_CONTROL_CODEPOINTS = "\x00-\x1F\x7F".freeze
ASCII_CONTROL_SYMBOLS = "\u{2400}-\u{241F}\u{2421}".freeze
- ASCII_CHARS = "\x20-\x7E".freeze
- TAG_START = "\u{E0001}".freeze
- TAG_START_SYMBOL = "LANG TAG".freeze
- TAG_SPACE = "\u{E0020}".freeze
- TAG_SPACE_SYMBOL = "TAG ␠".freeze
TAGS = "\u{E0021}-\u{E007E}".freeze
- TAG_DELETE = "\u{E007F}".freeze
- TAG_DELETE_SYMBOL = "TAG ␡".freeze
+
+ CONTROL_C0_SYMBOLS = [
+ "␀",
+ "␁",
+ "␂",
+ "␃",
+ "␄",
+ "␅",
+ "␆",
+ "␇",
+ "␈",
+ "␉",
+ "␊",
+ "␋",
+ "␌",
+ "␍",
+ "␎",
+ "␏",
+ "␐",
+ "␑",
+ "␒",
+ "␓",
+ "␔",
+ "␕",
+ "␖",
+ "␗",
+ "␘",
+ "␙",
+ "␚",
+ "␛",
+ "␜",
+ "␝",
+ "␞",
+ "␟",
+ ]
+
+ CONTROL_DELETE_SYMBOL = "␡"
+
+ CONTROL_C1_NAMES = {
+ 0x80 => "PAD",
+ 0x81 => "HOP",
+ 0x82 => "BPH",
+ 0x83 => "NBH",
+ 0x84 => "IND",
+ 0x85 => "NEL",
+ 0x86 => "SSA",
+ 0x87 => "ESA",
+ 0x88 => "HTS",
+ 0x89 => "HTJ",
+ 0x8A => "VTS",
+ 0x8B => "PLD",
+ 0x8C => "PLU",
+ 0x8D => "RI",
+ 0x8E => "SS2",
+ 0x8F => "SS3",
+ 0x90 => "DCS",
+ 0x91 => "PU1",
+ 0x92 => "PU2",
+ 0x93 => "STS",
+ 0x94 => "CCH",
+ 0x95 => "MW",
+ 0x96 => "SPA",
+ 0x97 => "EPA",
+ 0x98 => "SOS",
+ 0x99 => "SGC",
+ 0x9A => "SCI",
+ 0x9B => "CSI",
+ 0x9C => "ST",
+ 0x9D => "OSC",
+ 0x9E => "PM",
+ 0x9F => "APC",
+ }
+
INTERESTING_CODEPOINTS = {
- "\u{0080}" => "PAD",
- "\u{0081}" => "HOP",
- "\u{0082}" => "BPH",
- "\u{0083}" => "NBH",
- "\u{0084}" => "IND",
- "\u{0085}" => "NEL",
- "\u{0086}" => "SSA",
- "\u{0087}" => "ESA",
- "\u{0088}" => "HTS",
- "\u{0089}" => "HTJ",
- "\u{008A}" => "VTS",
- "\u{008B}" => "PLD",
- "\u{008C}" => "PLU",
- "\u{008D}" => "RI",
- "\u{008E}" => "SS2",
- "\u{008F}" => "SS3",
- "\u{0090}" => "DCS",
- "\u{0091}" => "PU1",
- "\u{0092}" => "PU2",
- "\u{0093}" => "STS",
- "\u{0094}" => "CCH",
- "\u{0095}" => "MW",
- "\u{0096}" => "SPA",
- "\u{0097}" => "EPA",
- "\u{0098}" => "SOS",
- "\u{0099}" => "SGC",
- "\u{009A}" => "SCI",
- "\u{009B}" => "CSI",
- "\u{009C}" => "ST",
- "\u{009D}" => "OSC",
- "\u{009E}" => "PM",
- "\u{009F}" => "APC",
+ 0x200E => "LRM",
+ 0x200F => "RLM",
+ 0x202A => "LRE",
+ 0x202B => "RLE",
+ 0x202C => "PDF",
+ 0x202D => "LRO",
+ 0x202E => "RLO",
+ 0x2066 => "LRI",
+ 0x2067 => "RLI",
+ 0x2068 => "FSI",
+ 0x2069 => "PDI",
- "\u{200E}" => "LRM",
- "\u{200F}" => "RLM",
- "\u{202A}" => "LRE",
- "\u{202B}" => "RLE",
- "\u{202C}" => "PDF",
- "\u{202D}" => "LRO",
- "\u{202E}" => "RLO",
- "\u{2066}" => "LRI",
- "\u{2067}" => "RLI",
- "\u{2068}" => "FSI",
- "\u{2069}" => "PDI",
+ 0xFE00 => "VS1",
+ 0xFE01 => "VS2",
+ 0xFE02 => "VS3",
+ 0xFE03 => "VS4",
+ 0xFE04 => "VS5",
+ 0xFE05 => "VS6",
+ 0xFE06 => "VS7",
+ 0xFE07 => "VS8",
+ 0xFE08 => "VS9",
+ 0xFE09 => "VS10",
+ 0xFE0A => "VS11",
+ 0xFE0B => "VS12",
+ 0xFE0C => "VS13",
+ 0xFE0D => "VS14",
+ 0xFE0E => "VS15",
+ 0xFE0F => "VS16",
- "\u{FE00}" => "VS1",
- "\u{FE01}" => "VS2",
- "\u{FE02}" => "VS3",
- "\u{FE03}" => "VS4",
- "\u{FE04}" => "VS5",
- "\u{FE05}" => "VS6",
- "\u{FE06}" => "VS7",
- "\u{FE07}" => "VS8",
- "\u{FE08}" => "VS9",
- "\u{FE09}" => "VS10",
- "\u{FE0A}" => "VS11",
- "\u{FE0B}" => "VS12",
- "\u{FE0C}" => "VS13",
- "\u{FE0D}" => "VS14",
- "\u{FE0E}" => "VS15",
- "\u{FE0F}" => "VS16",
+ 0xE0001 => "LANG TAG",
+ 0xE0020 => "TAG ␠",
+ 0xE007F => "TAG ␡",
- "\u{E0100}" => "VS17",
- "\u{E0101}" => "VS18",
- "\u{E0102}" => "VS19",
- "\u{E0103}" => "VS20",
- "\u{E0104}" => "VS21",
- "\u{E0105}" => "VS22",
- "\u{E0106}" => "VS23",
- "\u{E0107}" => "VS24",
- "\u{E0108}" => "VS25",
- "\u{E0109}" => "VS26",
- "\u{E010A}" => "VS27",
- "\u{E010B}" => "VS28",
- "\u{E010C}" => "VS29",
- "\u{E010D}" => "VS30",
- "\u{E010E}" => "VS31",
- "\u{E010F}" => "VS32",
- "\u{E0110}" => "VS33",
- "\u{E0111}" => "VS34",
- "\u{E0112}" => "VS35",
- "\u{E0113}" => "VS36",
- "\u{E0114}" => "VS37",
- "\u{E0115}" => "VS38",
- "\u{E0116}" => "VS39",
- "\u{E0117}" => "VS40",
- "\u{E0118}" => "VS41",
- "\u{E0119}" => "VS42",
- "\u{E011A}" => "VS43",
- "\u{E011B}" => "VS44",
- "\u{E011C}" => "VS45",
- "\u{E011D}" => "VS46",
- "\u{E011E}" => "VS47",
- "\u{E011F}" => "VS48",
- "\u{E0120}" => "VS49",
- "\u{E0121}" => "VS50",
- "\u{E0122}" => "VS51",
- "\u{E0123}" => "VS52",
- "\u{E0124}" => "VS53",
- "\u{E0125}" => "VS54",
- "\u{E0126}" => "VS55",
- "\u{E0127}" => "VS56",
- "\u{E0128}" => "VS57",
- "\u{E0129}" => "VS58",
- "\u{E012A}" => "VS59",
- "\u{E012B}" => "VS60",
- "\u{E012C}" => "VS61",
- "\u{E012D}" => "VS62",
- "\u{E012E}" => "VS63",
- "\u{E012F}" => "VS64",
- "\u{E0130}" => "VS65",
- "\u{E0131}" => "VS66",
- "\u{E0132}" => "VS67",
- "\u{E0133}" => "VS68",
- "\u{E0134}" => "VS69",
- "\u{E0135}" => "VS70",
- "\u{E0136}" => "VS71",
- "\u{E0137}" => "VS72",
- "\u{E0138}" => "VS73",
- "\u{E0139}" => "VS74",
- "\u{E013A}" => "VS75",
- "\u{E013B}" => "VS76",
- "\u{E013C}" => "VS77",
- "\u{E013D}" => "VS78",
- "\u{E013E}" => "VS79",
- "\u{E013F}" => "VS80",
- "\u{E0140}" => "VS81",
- "\u{E0141}" => "VS82",
- "\u{E0142}" => "VS83",
- "\u{E0143}" => "VS84",
- "\u{E0144}" => "VS85",
- "\u{E0145}" => "VS86",
- "\u{E0146}" => "VS87",
- "\u{E0147}" => "VS88",
- "\u{E0148}" => "VS89",
- "\u{E0149}" => "VS90",
- "\u{E014A}" => "VS91",
- "\u{E014B}" => "VS92",
- "\u{E014C}" => "VS93",
- "\u{E014D}" => "VS94",
- "\u{E014E}" => "VS95",
- "\u{E014F}" => "VS96",
- "\u{E0150}" => "VS97",
- "\u{E0151}" => "VS98",
- "\u{E0152}" => "VS99",
- "\u{E0153}" => "VS100",
- "\u{E0154}" => "VS101",
- "\u{E0155}" => "VS102",
- "\u{E0156}" => "VS103",
- "\u{E0157}" => "VS104",
- "\u{E0158}" => "VS105",
- "\u{E0159}" => "VS106",
- "\u{E015A}" => "VS107",
- "\u{E015B}" => "VS108",
- "\u{E015C}" => "VS109",
- "\u{E015D}" => "VS110",
- "\u{E015E}" => "VS111",
- "\u{E015F}" => "VS112",
- "\u{E0160}" => "VS113",
- "\u{E0161}" => "VS114",
- "\u{E0162}" => "VS115",
- "\u{E0163}" => "VS116",
- "\u{E0164}" => "VS117",
- "\u{E0165}" => "VS118",
- "\u{E0166}" => "VS119",
- "\u{E0167}" => "VS120",
- "\u{E0168}" => "VS121",
- "\u{E0169}" => "VS122",
- "\u{E016A}" => "VS123",
- "\u{E016B}" => "VS124",
- "\u{E016C}" => "VS125",
- "\u{E016D}" => "VS126",
- "\u{E016E}" => "VS127",
- "\u{E016F}" => "VS128",
- "\u{E0170}" => "VS129",
- "\u{E0171}" => "VS130",
- "\u{E0172}" => "VS131",
- "\u{E0173}" => "VS132",
- "\u{E0174}" => "VS133",
- "\u{E0175}" => "VS134",
- "\u{E0176}" => "VS135",
- "\u{E0177}" => "VS136",
- "\u{E0178}" => "VS137",
- "\u{E0179}" => "VS138",
- "\u{E017A}" => "VS139",
- "\u{E017B}" => "VS140",
- "\u{E017C}" => "VS141",
- "\u{E017D}" => "VS142",
- "\u{E017E}" => "VS143",
- "\u{E017F}" => "VS144",
- "\u{E0180}" => "VS145",
- "\u{E0181}" => "VS146",
- "\u{E0182}" => "VS147",
- "\u{E0183}" => "VS148",
- "\u{E0184}" => "VS149",
- "\u{E0185}" => "VS150",
- "\u{E0186}" => "VS151",
- "\u{E0187}" => "VS152",
- "\u{E0188}" => "VS153",
- "\u{E0189}" => "VS154",
- "\u{E018A}" => "VS155",
- "\u{E018B}" => "VS156",
- "\u{E018C}" => "VS157",
- "\u{E018D}" => "VS158",
- "\u{E018E}" => "VS159",
- "\u{E018F}" => "VS160",
- "\u{E0190}" => "VS161",
- "\u{E0191}" => "VS162",
- "\u{E0192}" => "VS163",
- "\u{E0193}" => "VS164",
- "\u{E0194}" => "VS165",
- "\u{E0195}" => "VS166",
- "\u{E0196}" => "VS167",
- "\u{E0197}" => "VS168",
- "\u{E0198}" => "VS169",
- "\u{E0199}" => "VS170",
- "\u{E019A}" => "VS171",
- "\u{E019B}" => "VS172",
- "\u{E019C}" => "VS173",
- "\u{E019D}" => "VS174",
- "\u{E019E}" => "VS175",
- "\u{E019F}" => "VS176",
- "\u{E01A0}" => "VS177",
- "\u{E01A1}" => "VS178",
- "\u{E01A2}" => "VS179",
- "\u{E01A3}" => "VS180",
- "\u{E01A4}" => "VS181",
- "\u{E01A5}" => "VS182",
- "\u{E01A6}" => "VS183",
- "\u{E01A7}" => "VS184",
- "\u{E01A8}" => "VS185",
- "\u{E01A9}" => "VS186",
- "\u{E01AA}" => "VS187",
- "\u{E01AB}" => "VS188",
- "\u{E01AC}" => "VS189",
- "\u{E01AD}" => "VS190",
- "\u{E01AE}" => "VS191",
- "\u{E01AF}" => "VS192",
- "\u{E01B0}" => "VS193",
- "\u{E01B1}" => "VS194",
- "\u{E01B2}" => "VS195",
- "\u{E01B3}" => "VS196",
- "\u{E01B4}" => "VS197",
- "\u{E01B5}" => "VS198",
- "\u{E01B6}" => "VS199",
- "\u{E01B7}" => "VS200",
- "\u{E01B8}" => "VS201",
- "\u{E01B9}" => "VS202",
- "\u{E01BA}" => "VS203",
- "\u{E01BB}" => "VS204",
- "\u{E01BC}" => "VS205",
- "\u{E01BD}" => "VS206",
- "\u{E01BE}" => "VS207",
- "\u{E01BF}" => "VS208",
- "\u{E01C0}" => "VS209",
- "\u{E01C1}" => "VS210",
- "\u{E01C2}" => "VS211",
- "\u{E01C3}" => "VS212",
- "\u{E01C4}" => "VS213",
- "\u{E01C5}" => "VS214",
- "\u{E01C6}" => "VS215",
- "\u{E01C7}" => "VS216",
- "\u{E01C8}" => "VS217",
- "\u{E01C9}" => "VS218",
- "\u{E01CA}" => "VS219",
- "\u{E01CB}" => "VS220",
- "\u{E01CC}" => "VS221",
- "\u{E01CD}" => "VS222",
- "\u{E01CE}" => "VS223",
- "\u{E01CF}" => "VS224",
- "\u{E01D0}" => "VS225",
- "\u{E01D1}" => "VS226",
- "\u{E01D2}" => "VS227",
- "\u{E01D3}" => "VS228",
- "\u{E01D4}" => "VS229",
- "\u{E01D5}" => "VS230",
- "\u{E01D6}" => "VS231",
- "\u{E01D7}" => "VS232",
- "\u{E01D8}" => "VS233",
- "\u{E01D9}" => "VS234",
- "\u{E01DA}" => "VS235",
- "\u{E01DB}" => "VS236",
- "\u{E01DC}" => "VS237",
- "\u{E01DD}" => "VS238",
- "\u{E01DE}" => "VS239",
- "\u{E01DF}" => "VS240",
- "\u{E01E0}" => "VS241",
- "\u{E01E1}" => "VS242",
- "\u{E01E2}" => "VS243",
- "\u{E01E3}" => "VS244",
- "\u{E01E4}" => "VS245",
- "\u{E01E5}" => "VS246",
- "\u{E01E6}" => "VS247",
- "\u{E01E7}" => "VS248",
- "\u{E01E8}" => "VS249",
- "\u{E01E9}" => "VS250",
- "\u{E01EA}" => "VS251",
- "\u{E01EB}" => "VS252",
- "\u{E01EC}" => "VS253",
- "\u{E01ED}" => "VS254",
- "\u{E01EE}" => "VS255",
- "\u{E01EF}" => "VS256",
+ 0xE0100 => "VS17",
+ 0xE0101 => "VS18",
+ 0xE0102 => "VS19",
+ 0xE0103 => "VS20",
+ 0xE0104 => "VS21",
+ 0xE0105 => "VS22",
+ 0xE0106 => "VS23",
+ 0xE0107 => "VS24",
+ 0xE0108 => "VS25",
+ 0xE0109 => "VS26",
+ 0xE010A => "VS27",
+ 0xE010B => "VS28",
+ 0xE010C => "VS29",
+ 0xE010D => "VS30",
+ 0xE010E => "VS31",
+ 0xE010F => "VS32",
+ 0xE0110 => "VS33",
+ 0xE0111 => "VS34",
+ 0xE0112 => "VS35",
+ 0xE0113 => "VS36",
+ 0xE0114 => "VS37",
+ 0xE0115 => "VS38",
+ 0xE0116 => "VS39",
+ 0xE0117 => "VS40",
+ 0xE0118 => "VS41",
+ 0xE0119 => "VS42",
+ 0xE011A => "VS43",
+ 0xE011B => "VS44",
+ 0xE011C => "VS45",
+ 0xE011D => "VS46",
+ 0xE011E => "VS47",
+ 0xE011F => "VS48",
+ 0xE0120 => "VS49",
+ 0xE0121 => "VS50",
+ 0xE0122 => "VS51",
+ 0xE0123 => "VS52",
+ 0xE0124 => "VS53",
+ 0xE0125 => "VS54",
+ 0xE0126 => "VS55",
+ 0xE0127 => "VS56",
+ 0xE0128 => "VS57",
+ 0xE0129 => "VS58",
+ 0xE012A => "VS59",
+ 0xE012B => "VS60",
+ 0xE012C => "VS61",
+ 0xE012D => "VS62",
+ 0xE012E => "VS63",
+ 0xE012F => "VS64",
+ 0xE0130 => "VS65",
+ 0xE0131 => "VS66",
+ 0xE0132 => "VS67",
+ 0xE0133 => "VS68",
+ 0xE0134 => "VS69",
+ 0xE0135 => "VS70",
+ 0xE0136 => "VS71",
+ 0xE0137 => "VS72",
+ 0xE0138 => "VS73",
+ 0xE0139 => "VS74",
+ 0xE013A => "VS75",
+ 0xE013B => "VS76",
+ 0xE013C => "VS77",
+ 0xE013D => "VS78",
+ 0xE013E => "VS79",
+ 0xE013F => "VS80",
+ 0xE0140 => "VS81",
+ 0xE0141 => "VS82",
+ 0xE0142 => "VS83",
+ 0xE0143 => "VS84",
+ 0xE0144 => "VS85",
+ 0xE0145 => "VS86",
+ 0xE0146 => "VS87",
+ 0xE0147 => "VS88",
+ 0xE0148 => "VS89",
+ 0xE0149 => "VS90",
+ 0xE014A => "VS91",
+ 0xE014B => "VS92",
+ 0xE014C => "VS93",
+ 0xE014D => "VS94",
+ 0xE014E => "VS95",
+ 0xE014F => "VS96",
+ 0xE0150 => "VS97",
+ 0xE0151 => "VS98",
+ 0xE0152 => "VS99",
+ 0xE0153 => "VS100",
+ 0xE0154 => "VS101",
+ 0xE0155 => "VS102",
+ 0xE0156 => "VS103",
+ 0xE0157 => "VS104",
+ 0xE0158 => "VS105",
+ 0xE0159 => "VS106",
+ 0xE015A => "VS107",
+ 0xE015B => "VS108",
+ 0xE015C => "VS109",
+ 0xE015D => "VS110",
+ 0xE015E => "VS111",
+ 0xE015F => "VS112",
+ 0xE0160 => "VS113",
+ 0xE0161 => "VS114",
+ 0xE0162 => "VS115",
+ 0xE0163 => "VS116",
+ 0xE0164 => "VS117",
+ 0xE0165 => "VS118",
+ 0xE0166 => "VS119",
+ 0xE0167 => "VS120",
+ 0xE0168 => "VS121",
+ 0xE0169 => "VS122",
+ 0xE016A => "VS123",
+ 0xE016B => "VS124",
+ 0xE016C => "VS125",
+ 0xE016D => "VS126",
+ 0xE016E => "VS127",
+ 0xE016F => "VS128",
+ 0xE0170 => "VS129",
+ 0xE0171 => "VS130",
+ 0xE0172 => "VS131",
+ 0xE0173 => "VS132",
+ 0xE0174 => "VS133",
+ 0xE0175 => "VS134",
+ 0xE0176 => "VS135",
+ 0xE0177 => "VS136",
+ 0xE0178 => "VS137",
+ 0xE0179 => "VS138",
+ 0xE017A => "VS139",
+ 0xE017B => "VS140",
+ 0xE017C => "VS141",
+ 0xE017D => "VS142",
+ 0xE017E => "VS143",
+ 0xE017F => "VS144",
+ 0xE0180 => "VS145",
+ 0xE0181 => "VS146",
+ 0xE0182 => "VS147",
+ 0xE0183 => "VS148",
+ 0xE0184 => "VS149",
+ 0xE0185 => "VS150",
+ 0xE0186 => "VS151",
+ 0xE0187 => "VS152",
+ 0xE0188 => "VS153",
+ 0xE0189 => "VS154",
+ 0xE018A => "VS155",
+ 0xE018B => "VS156",
+ 0xE018C => "VS157",
+ 0xE018D => "VS158",
+ 0xE018E => "VS159",
+ 0xE018F => "VS160",
+ 0xE0190 => "VS161",
+ 0xE0191 => "VS162",
+ 0xE0192 => "VS163",
+ 0xE0193 => "VS164",
+ 0xE0194 => "VS165",
+ 0xE0195 => "VS166",
+ 0xE0196 => "VS167",
+ 0xE0197 => "VS168",
+ 0xE0198 => "VS169",
+ 0xE0199 => "VS170",
+ 0xE019A => "VS171",
+ 0xE019B => "VS172",
+ 0xE019C => "VS173",
+ 0xE019D => "VS174",
+ 0xE019E => "VS175",
+ 0xE019F => "VS176",
+ 0xE01A0 => "VS177",
+ 0xE01A1 => "VS178",
+ 0xE01A2 => "VS179",
+ 0xE01A3 => "VS180",
+ 0xE01A4 => "VS181",
+ 0xE01A5 => "VS182",
+ 0xE01A6 => "VS183",
+ 0xE01A7 => "VS184",
+ 0xE01A8 => "VS185",
+ 0xE01A9 => "VS186",
+ 0xE01AA => "VS187",
+ 0xE01AB => "VS188",
+ 0xE01AC => "VS189",
+ 0xE01AD => "VS190",
+ 0xE01AE => "VS191",
+ 0xE01AF => "VS192",
+ 0xE01B0 => "VS193",
+ 0xE01B1 => "VS194",
+ 0xE01B2 => "VS195",
+ 0xE01B3 => "VS196",
+ 0xE01B4 => "VS197",
+ 0xE01B5 => "VS198",
+ 0xE01B6 => "VS199",
+ 0xE01B7 => "VS200",
+ 0xE01B8 => "VS201",
+ 0xE01B9 => "VS202",
+ 0xE01BA => "VS203",
+ 0xE01BB => "VS204",
+ 0xE01BC => "VS205",
+ 0xE01BD => "VS206",
+ 0xE01BE => "VS207",
+ 0xE01BF => "VS208",
+ 0xE01C0 => "VS209",
+ 0xE01C1 => "VS210",
+ 0xE01C2 => "VS211",
+ 0xE01C3 => "VS212",
+ 0xE01C4 => "VS213",
+ 0xE01C5 => "VS214",
+ 0xE01C6 => "VS215",
+ 0xE01C7 => "VS216",
+ 0xE01C8 => "VS217",
+ 0xE01C9 => "VS218",
+ 0xE01CA => "VS219",
+ 0xE01CB => "VS220",
+ 0xE01CC => "VS221",
+ 0xE01CD => "VS222",
+ 0xE01CE => "VS223",
+ 0xE01CF => "VS224",
+ 0xE01D0 => "VS225",
+ 0xE01D1 => "VS226",
+ 0xE01D2 => "VS227",
+ 0xE01D3 => "VS228",
+ 0xE01D4 => "VS229",
+ 0xE01D5 => "VS230",
+ 0xE01D6 => "VS231",
+ 0xE01D7 => "VS232",
+ 0xE01D8 => "VS233",
+ 0xE01D9 => "VS234",
+ 0xE01DA => "VS235",
+ 0xE01DB => "VS236",
+ 0xE01DC => "VS237",
+ 0xE01DD => "VS238",
+ 0xE01DE => "VS239",
+ 0xE01DF => "VS240",
+ 0xE01E0 => "VS241",
+ 0xE01E1 => "VS242",
+ 0xE01E2 => "VS243",
+ 0xE01E3 => "VS244",
+ 0xE01E4 => "VS245",
+ 0xE01E5 => "VS246",
+ 0xE01E6 => "VS247",
+ 0xE01E7 => "VS248",
+ 0xE01E8 => "VS249",
+ 0xE01E9 => "VS250",
+ 0xE01EA => "VS251",
+ 0xE01EB => "VS252",
+ 0xE01EC => "VS253",
+ 0xE01ED => "VS254",
+ 0xE01EE => "VS255",
+ 0xE01EF => "VS256",
}.freeze
- COULD_BE_WHITESPACE = '[\p{Space}⠀𝅙]'.freeze
- def self.symbolify(char, encoding = char.encoding)
- return "n/a" if Unicode::Categories.category(char) == "Cn"
+ def self.symbolify(char, char_info)
+ if !char_info.valid?
+ "�"
+ else
+ case char_info
+ when UnicodeCharacteristics
+ Symbolify.unicode(char, char_info)
+ when ByteCharacteristics
+ Symbolify.byte(char, char_info)
+ when AsciiCharacteristics
+ Symbolify.ascii(char, char_info)
+ else
+ Symbolify.binary(char)
+ end
+ end
+ end
+ def self.unicode(char, char_info)
+ return "n/a" if !char_info.assigned?
+
char = char.dup
+ ord = char.ord
+ encoding = char_info.encoding
- char.tr!(
- ASCII_CONTROL_CODEPOINTS.encode(encoding),
- ASCII_CONTROL_SYMBOLS.encode(encoding)
- )
- char.gsub!(
- Regexp.compile(COULD_BE_WHITESPACE.encode(encoding)),
- ']\0['.encode(encoding)
- )
+ if char_info.delete?
+ char = CONTROL_DELETE_SYMBOL
+ elsif char_info.c0?
+ char = CONTROL_C0_SYMBOLS[ord]
+ elsif char_info.c1?
+ char = CONTROL_C1_NAMES[ord]
+ elsif char_info.blank?
+ char = "]".encode(encoding) + char + "[".encode(encoding)
+ elsif ord > 917536 && ord < 917631
+ char = "TAG ".encode(encoding) +
+ char.tr(TAGS.encode(encoding), ASCII_CHARS.encode(encoding))
+ else
+ char = INTERESTING_CODEPOINTS[char.ord] || char
+ end
- INTERESTING_CODEPOINTS.each{ |cp, desc|
- char.gsub! Regexp.compile(cp.encode(encoding)), desc.encode(encoding)
- }
- char.gsub! TAG_START.encode(encoding), TAG_START_SYMBOL.encode(encoding)
- char.gsub! TAG_SPACE.encode(encoding), TAG_SPACE_SYMBOL.encode(encoding)
- char.gsub! TAG_DELETE.encode(encoding), TAG_DELETE_SYMBOL.encode(encoding)
+ char.encode("UTF-8")
+ end
+ def self.byte(char, char_info)
+ return "n/a" if !char_info.assigned?
+
ord = char.ord
- if ord > 917536 && ord < 917631
- char.tr!(TAGS.encode(encoding), ASCII_CHARS.encode(encoding))
- char = "TAG ".encode(encoding) + char
+ encoding = char_info.encoding
+ no_converter = !!(NO_UTF8_CONVERTER =~ encoding.name)
+ treat_char_unconverted = false
+
+ if char_info.delete?
+ char = CONTROL_DELETE_SYMBOL
+ elsif char_info.c0?
+ char = CONTROL_C0_SYMBOLS[ord]
+ elsif char_info.c1?
+ char = CONTROL_C1_NAMES[ord]
+ elsif no_converter
+ treat_char_unconverted = true
+ elsif char_info.blank?
+ char = "]".encode(encoding) + char + "[".encode(encoding)
end
+ if no_converter && treat_char_unconverted
+ char.inspect
+ else
+ char.encode("UTF-8")
+ end
+ end
+
+ def self.ascii(char, char_info)
+ if char_info.delete?
+ char = CONTROL_DELETE_SYMBOL
+ elsif char_info.c0?
+ char = CONTROL_C0_SYMBOLS[char.ord]
+ elsif char_info.blank?
+ char = "]" + char + "["
+ end
+
char
+ end
+
+ def self.binary(char)
+ char.inspect
end
end
end