lib/unibits/symbolify.rb in unibits-1.3.0 vs lib/unibits/symbolify.rb in unibits-2.0.0

- old
+ new

@@ -1,351 +1,442 @@ -require "unicode/categories" - module Unibits module Symbolify + NO_UTF8_CONVERTER = /^Windows-1258/ + ASCII_CHARS = "\x20-\x7E".freeze ASCII_CONTROL_CODEPOINTS = "\x00-\x1F\x7F".freeze ASCII_CONTROL_SYMBOLS = "\u{2400}-\u{241F}\u{2421}".freeze - ASCII_CHARS = "\x20-\x7E".freeze - TAG_START = "\u{E0001}".freeze - TAG_START_SYMBOL = "LANG TAG".freeze - TAG_SPACE = "\u{E0020}".freeze - TAG_SPACE_SYMBOL = "TAG ␠".freeze TAGS = "\u{E0021}-\u{E007E}".freeze - TAG_DELETE = "\u{E007F}".freeze - TAG_DELETE_SYMBOL = "TAG ␡".freeze + + CONTROL_C0_SYMBOLS = [ + "␀", + "␁", + "␂", + "␃", + "␄", + "␅", + "␆", + "␇", + "␈", + "␉", + "␊", + "␋", + "␌", + "␍", + "␎", + "␏", + "␐", + "␑", + "␒", + "␓", + "␔", + "␕", + "␖", + "␗", + "␘", + "␙", + "␚", + "␛", + "␜", + "␝", + "␞", + "␟", + ] + + CONTROL_DELETE_SYMBOL = "␡" + + CONTROL_C1_NAMES = { + 0x80 => "PAD", + 0x81 => "HOP", + 0x82 => "BPH", + 0x83 => "NBH", + 0x84 => "IND", + 0x85 => "NEL", + 0x86 => "SSA", + 0x87 => "ESA", + 0x88 => "HTS", + 0x89 => "HTJ", + 0x8A => "VTS", + 0x8B => "PLD", + 0x8C => "PLU", + 0x8D => "RI", + 0x8E => "SS2", + 0x8F => "SS3", + 0x90 => "DCS", + 0x91 => "PU1", + 0x92 => "PU2", + 0x93 => "STS", + 0x94 => "CCH", + 0x95 => "MW", + 0x96 => "SPA", + 0x97 => "EPA", + 0x98 => "SOS", + 0x99 => "SGC", + 0x9A => "SCI", + 0x9B => "CSI", + 0x9C => "ST", + 0x9D => "OSC", + 0x9E => "PM", + 0x9F => "APC", + } + INTERESTING_CODEPOINTS = { - "\u{0080}" => "PAD", - "\u{0081}" => "HOP", - "\u{0082}" => "BPH", - "\u{0083}" => "NBH", - "\u{0084}" => "IND", - "\u{0085}" => "NEL", - "\u{0086}" => "SSA", - "\u{0087}" => "ESA", - "\u{0088}" => "HTS", - "\u{0089}" => "HTJ", - "\u{008A}" => "VTS", - "\u{008B}" => "PLD", - "\u{008C}" => "PLU", - "\u{008D}" => "RI", - "\u{008E}" => "SS2", - "\u{008F}" => "SS3", - "\u{0090}" => "DCS", - "\u{0091}" => "PU1", - "\u{0092}" => "PU2", - "\u{0093}" => "STS", - "\u{0094}" => "CCH", - "\u{0095}" => "MW", - "\u{0096}" => "SPA", - "\u{0097}" => "EPA", - "\u{0098}" => "SOS", - "\u{0099}" => "SGC", - "\u{009A}" => "SCI", - "\u{009B}" => "CSI", - "\u{009C}" => "ST", - "\u{009D}" => "OSC", - "\u{009E}" => "PM", - "\u{009F}" => "APC", + 0x200E => "LRM", + 0x200F => "RLM", + 0x202A => "LRE", + 0x202B => "RLE", + 0x202C => "PDF", + 0x202D => "LRO", + 0x202E => "RLO", + 0x2066 => "LRI", + 0x2067 => "RLI", + 0x2068 => "FSI", + 0x2069 => "PDI", - "\u{200E}" => "LRM", - "\u{200F}" => "RLM", - "\u{202A}" => "LRE", - "\u{202B}" => "RLE", - "\u{202C}" => "PDF", - "\u{202D}" => "LRO", - "\u{202E}" => "RLO", - "\u{2066}" => "LRI", - "\u{2067}" => "RLI", - "\u{2068}" => "FSI", - "\u{2069}" => "PDI", + 0xFE00 => "VS1", + 0xFE01 => "VS2", + 0xFE02 => "VS3", + 0xFE03 => "VS4", + 0xFE04 => "VS5", + 0xFE05 => "VS6", + 0xFE06 => "VS7", + 0xFE07 => "VS8", + 0xFE08 => "VS9", + 0xFE09 => "VS10", + 0xFE0A => "VS11", + 0xFE0B => "VS12", + 0xFE0C => "VS13", + 0xFE0D => "VS14", + 0xFE0E => "VS15", + 0xFE0F => "VS16", - "\u{FE00}" => "VS1", - "\u{FE01}" => "VS2", - "\u{FE02}" => "VS3", - "\u{FE03}" => "VS4", - "\u{FE04}" => "VS5", - "\u{FE05}" => "VS6", - "\u{FE06}" => "VS7", - "\u{FE07}" => "VS8", - "\u{FE08}" => "VS9", - "\u{FE09}" => "VS10", - "\u{FE0A}" => "VS11", - "\u{FE0B}" => "VS12", - "\u{FE0C}" => "VS13", - "\u{FE0D}" => "VS14", - "\u{FE0E}" => "VS15", - "\u{FE0F}" => "VS16", + 0xE0001 => "LANG TAG", + 0xE0020 => "TAG ␠", + 0xE007F => "TAG ␡", - "\u{E0100}" => "VS17", - "\u{E0101}" => "VS18", - "\u{E0102}" => "VS19", - "\u{E0103}" => "VS20", - "\u{E0104}" => "VS21", - "\u{E0105}" => "VS22", - "\u{E0106}" => "VS23", - "\u{E0107}" => "VS24", - "\u{E0108}" => "VS25", - "\u{E0109}" => "VS26", - "\u{E010A}" => "VS27", - "\u{E010B}" => "VS28", - "\u{E010C}" => "VS29", - "\u{E010D}" => "VS30", - "\u{E010E}" => "VS31", - "\u{E010F}" => "VS32", - "\u{E0110}" => "VS33", - "\u{E0111}" => "VS34", - "\u{E0112}" => "VS35", - "\u{E0113}" => "VS36", - "\u{E0114}" => "VS37", - "\u{E0115}" => "VS38", - "\u{E0116}" => "VS39", - "\u{E0117}" => "VS40", - "\u{E0118}" => "VS41", - "\u{E0119}" => "VS42", - "\u{E011A}" => "VS43", - "\u{E011B}" => "VS44", - "\u{E011C}" => "VS45", - "\u{E011D}" => "VS46", - "\u{E011E}" => "VS47", - "\u{E011F}" => "VS48", - "\u{E0120}" => "VS49", - "\u{E0121}" => "VS50", - "\u{E0122}" => "VS51", - "\u{E0123}" => "VS52", - "\u{E0124}" => "VS53", - "\u{E0125}" => "VS54", - "\u{E0126}" => "VS55", - "\u{E0127}" => "VS56", - "\u{E0128}" => "VS57", - "\u{E0129}" => "VS58", - "\u{E012A}" => "VS59", - "\u{E012B}" => "VS60", - "\u{E012C}" => "VS61", - "\u{E012D}" => "VS62", - "\u{E012E}" => "VS63", - "\u{E012F}" => "VS64", - "\u{E0130}" => "VS65", - "\u{E0131}" => "VS66", - "\u{E0132}" => "VS67", - "\u{E0133}" => "VS68", - "\u{E0134}" => "VS69", - "\u{E0135}" => "VS70", - "\u{E0136}" => "VS71", - "\u{E0137}" => "VS72", - "\u{E0138}" => "VS73", - "\u{E0139}" => "VS74", - "\u{E013A}" => "VS75", - "\u{E013B}" => "VS76", - "\u{E013C}" => "VS77", - "\u{E013D}" => "VS78", - "\u{E013E}" => "VS79", - "\u{E013F}" => "VS80", - "\u{E0140}" => "VS81", - "\u{E0141}" => "VS82", - "\u{E0142}" => "VS83", - "\u{E0143}" => "VS84", - "\u{E0144}" => "VS85", - "\u{E0145}" => "VS86", - "\u{E0146}" => "VS87", - "\u{E0147}" => "VS88", - "\u{E0148}" => "VS89", - "\u{E0149}" => "VS90", - "\u{E014A}" => "VS91", - "\u{E014B}" => "VS92", - "\u{E014C}" => "VS93", - "\u{E014D}" => "VS94", - "\u{E014E}" => "VS95", - "\u{E014F}" => "VS96", - "\u{E0150}" => "VS97", - "\u{E0151}" => "VS98", - "\u{E0152}" => "VS99", - "\u{E0153}" => "VS100", - "\u{E0154}" => "VS101", - "\u{E0155}" => "VS102", - "\u{E0156}" => "VS103", - "\u{E0157}" => "VS104", - "\u{E0158}" => "VS105", - "\u{E0159}" => "VS106", - "\u{E015A}" => "VS107", - "\u{E015B}" => "VS108", - "\u{E015C}" => "VS109", - "\u{E015D}" => "VS110", - "\u{E015E}" => "VS111", - "\u{E015F}" => "VS112", - "\u{E0160}" => "VS113", - "\u{E0161}" => "VS114", - "\u{E0162}" => "VS115", - "\u{E0163}" => "VS116", - "\u{E0164}" => "VS117", - "\u{E0165}" => "VS118", - "\u{E0166}" => "VS119", - "\u{E0167}" => "VS120", - "\u{E0168}" => "VS121", - "\u{E0169}" => "VS122", - "\u{E016A}" => "VS123", - "\u{E016B}" => "VS124", - "\u{E016C}" => "VS125", - "\u{E016D}" => "VS126", - "\u{E016E}" => "VS127", - "\u{E016F}" => "VS128", - "\u{E0170}" => "VS129", - "\u{E0171}" => "VS130", - "\u{E0172}" => "VS131", - "\u{E0173}" => "VS132", - "\u{E0174}" => "VS133", - "\u{E0175}" => "VS134", - "\u{E0176}" => "VS135", - "\u{E0177}" => "VS136", - "\u{E0178}" => "VS137", - "\u{E0179}" => "VS138", - "\u{E017A}" => "VS139", - "\u{E017B}" => "VS140", - "\u{E017C}" => "VS141", - "\u{E017D}" => "VS142", - "\u{E017E}" => "VS143", - "\u{E017F}" => "VS144", - "\u{E0180}" => "VS145", - "\u{E0181}" => "VS146", - "\u{E0182}" => "VS147", - "\u{E0183}" => "VS148", - "\u{E0184}" => "VS149", - "\u{E0185}" => "VS150", - "\u{E0186}" => "VS151", - "\u{E0187}" => "VS152", - "\u{E0188}" => "VS153", - "\u{E0189}" => "VS154", - "\u{E018A}" => "VS155", - "\u{E018B}" => "VS156", - "\u{E018C}" => "VS157", - "\u{E018D}" => "VS158", - "\u{E018E}" => "VS159", - "\u{E018F}" => "VS160", - "\u{E0190}" => "VS161", - "\u{E0191}" => "VS162", - "\u{E0192}" => "VS163", - "\u{E0193}" => "VS164", - "\u{E0194}" => "VS165", - "\u{E0195}" => "VS166", - "\u{E0196}" => "VS167", - "\u{E0197}" => "VS168", - "\u{E0198}" => "VS169", - "\u{E0199}" => "VS170", - "\u{E019A}" => "VS171", - "\u{E019B}" => "VS172", - "\u{E019C}" => "VS173", - "\u{E019D}" => "VS174", - "\u{E019E}" => "VS175", - "\u{E019F}" => "VS176", - "\u{E01A0}" => "VS177", - "\u{E01A1}" => "VS178", - "\u{E01A2}" => "VS179", - "\u{E01A3}" => "VS180", - "\u{E01A4}" => "VS181", - "\u{E01A5}" => "VS182", - "\u{E01A6}" => "VS183", - "\u{E01A7}" => "VS184", - "\u{E01A8}" => "VS185", - "\u{E01A9}" => "VS186", - "\u{E01AA}" => "VS187", - "\u{E01AB}" => "VS188", - "\u{E01AC}" => "VS189", - "\u{E01AD}" => "VS190", - "\u{E01AE}" => "VS191", - "\u{E01AF}" => "VS192", - "\u{E01B0}" => "VS193", - "\u{E01B1}" => "VS194", - "\u{E01B2}" => "VS195", - "\u{E01B3}" => "VS196", - "\u{E01B4}" => "VS197", - "\u{E01B5}" => "VS198", - "\u{E01B6}" => "VS199", - "\u{E01B7}" => "VS200", - "\u{E01B8}" => "VS201", - "\u{E01B9}" => "VS202", - "\u{E01BA}" => "VS203", - "\u{E01BB}" => "VS204", - "\u{E01BC}" => "VS205", - "\u{E01BD}" => "VS206", - "\u{E01BE}" => "VS207", - "\u{E01BF}" => "VS208", - "\u{E01C0}" => "VS209", - "\u{E01C1}" => "VS210", - "\u{E01C2}" => "VS211", - "\u{E01C3}" => "VS212", - "\u{E01C4}" => "VS213", - "\u{E01C5}" => "VS214", - "\u{E01C6}" => "VS215", - "\u{E01C7}" => "VS216", - "\u{E01C8}" => "VS217", - "\u{E01C9}" => "VS218", - "\u{E01CA}" => "VS219", - "\u{E01CB}" => "VS220", - "\u{E01CC}" => "VS221", - "\u{E01CD}" => "VS222", - "\u{E01CE}" => "VS223", - "\u{E01CF}" => "VS224", - "\u{E01D0}" => "VS225", - "\u{E01D1}" => "VS226", - "\u{E01D2}" => "VS227", - "\u{E01D3}" => "VS228", - "\u{E01D4}" => "VS229", - "\u{E01D5}" => "VS230", - "\u{E01D6}" => "VS231", - "\u{E01D7}" => "VS232", - "\u{E01D8}" => "VS233", - "\u{E01D9}" => "VS234", - "\u{E01DA}" => "VS235", - "\u{E01DB}" => "VS236", - "\u{E01DC}" => "VS237", - "\u{E01DD}" => "VS238", - "\u{E01DE}" => "VS239", - "\u{E01DF}" => "VS240", - "\u{E01E0}" => "VS241", - "\u{E01E1}" => "VS242", - "\u{E01E2}" => "VS243", - "\u{E01E3}" => "VS244", - "\u{E01E4}" => "VS245", - "\u{E01E5}" => "VS246", - "\u{E01E6}" => "VS247", - "\u{E01E7}" => "VS248", - "\u{E01E8}" => "VS249", - "\u{E01E9}" => "VS250", - "\u{E01EA}" => "VS251", - "\u{E01EB}" => "VS252", - "\u{E01EC}" => "VS253", - "\u{E01ED}" => "VS254", - "\u{E01EE}" => "VS255", - "\u{E01EF}" => "VS256", + 0xE0100 => "VS17", + 0xE0101 => "VS18", + 0xE0102 => "VS19", + 0xE0103 => "VS20", + 0xE0104 => "VS21", + 0xE0105 => "VS22", + 0xE0106 => "VS23", + 0xE0107 => "VS24", + 0xE0108 => "VS25", + 0xE0109 => "VS26", + 0xE010A => "VS27", + 0xE010B => "VS28", + 0xE010C => "VS29", + 0xE010D => "VS30", + 0xE010E => "VS31", + 0xE010F => "VS32", + 0xE0110 => "VS33", + 0xE0111 => "VS34", + 0xE0112 => "VS35", + 0xE0113 => "VS36", + 0xE0114 => "VS37", + 0xE0115 => "VS38", + 0xE0116 => "VS39", + 0xE0117 => "VS40", + 0xE0118 => "VS41", + 0xE0119 => "VS42", + 0xE011A => "VS43", + 0xE011B => "VS44", + 0xE011C => "VS45", + 0xE011D => "VS46", + 0xE011E => "VS47", + 0xE011F => "VS48", + 0xE0120 => "VS49", + 0xE0121 => "VS50", + 0xE0122 => "VS51", + 0xE0123 => "VS52", + 0xE0124 => "VS53", + 0xE0125 => "VS54", + 0xE0126 => "VS55", + 0xE0127 => "VS56", + 0xE0128 => "VS57", + 0xE0129 => "VS58", + 0xE012A => "VS59", + 0xE012B => "VS60", + 0xE012C => "VS61", + 0xE012D => "VS62", + 0xE012E => "VS63", + 0xE012F => "VS64", + 0xE0130 => "VS65", + 0xE0131 => "VS66", + 0xE0132 => "VS67", + 0xE0133 => "VS68", + 0xE0134 => "VS69", + 0xE0135 => "VS70", + 0xE0136 => "VS71", + 0xE0137 => "VS72", + 0xE0138 => "VS73", + 0xE0139 => "VS74", + 0xE013A => "VS75", + 0xE013B => "VS76", + 0xE013C => "VS77", + 0xE013D => "VS78", + 0xE013E => "VS79", + 0xE013F => "VS80", + 0xE0140 => "VS81", + 0xE0141 => "VS82", + 0xE0142 => "VS83", + 0xE0143 => "VS84", + 0xE0144 => "VS85", + 0xE0145 => "VS86", + 0xE0146 => "VS87", + 0xE0147 => "VS88", + 0xE0148 => "VS89", + 0xE0149 => "VS90", + 0xE014A => "VS91", + 0xE014B => "VS92", + 0xE014C => "VS93", + 0xE014D => "VS94", + 0xE014E => "VS95", + 0xE014F => "VS96", + 0xE0150 => "VS97", + 0xE0151 => "VS98", + 0xE0152 => "VS99", + 0xE0153 => "VS100", + 0xE0154 => "VS101", + 0xE0155 => "VS102", + 0xE0156 => "VS103", + 0xE0157 => "VS104", + 0xE0158 => "VS105", + 0xE0159 => "VS106", + 0xE015A => "VS107", + 0xE015B => "VS108", + 0xE015C => "VS109", + 0xE015D => "VS110", + 0xE015E => "VS111", + 0xE015F => "VS112", + 0xE0160 => "VS113", + 0xE0161 => "VS114", + 0xE0162 => "VS115", + 0xE0163 => "VS116", + 0xE0164 => "VS117", + 0xE0165 => "VS118", + 0xE0166 => "VS119", + 0xE0167 => "VS120", + 0xE0168 => "VS121", + 0xE0169 => "VS122", + 0xE016A => "VS123", + 0xE016B => "VS124", + 0xE016C => "VS125", + 0xE016D => "VS126", + 0xE016E => "VS127", + 0xE016F => "VS128", + 0xE0170 => "VS129", + 0xE0171 => "VS130", + 0xE0172 => "VS131", + 0xE0173 => "VS132", + 0xE0174 => "VS133", + 0xE0175 => "VS134", + 0xE0176 => "VS135", + 0xE0177 => "VS136", + 0xE0178 => "VS137", + 0xE0179 => "VS138", + 0xE017A => "VS139", + 0xE017B => "VS140", + 0xE017C => "VS141", + 0xE017D => "VS142", + 0xE017E => "VS143", + 0xE017F => "VS144", + 0xE0180 => "VS145", + 0xE0181 => "VS146", + 0xE0182 => "VS147", + 0xE0183 => "VS148", + 0xE0184 => "VS149", + 0xE0185 => "VS150", + 0xE0186 => "VS151", + 0xE0187 => "VS152", + 0xE0188 => "VS153", + 0xE0189 => "VS154", + 0xE018A => "VS155", + 0xE018B => "VS156", + 0xE018C => "VS157", + 0xE018D => "VS158", + 0xE018E => "VS159", + 0xE018F => "VS160", + 0xE0190 => "VS161", + 0xE0191 => "VS162", + 0xE0192 => "VS163", + 0xE0193 => "VS164", + 0xE0194 => "VS165", + 0xE0195 => "VS166", + 0xE0196 => "VS167", + 0xE0197 => "VS168", + 0xE0198 => "VS169", + 0xE0199 => "VS170", + 0xE019A => "VS171", + 0xE019B => "VS172", + 0xE019C => "VS173", + 0xE019D => "VS174", + 0xE019E => "VS175", + 0xE019F => "VS176", + 0xE01A0 => "VS177", + 0xE01A1 => "VS178", + 0xE01A2 => "VS179", + 0xE01A3 => "VS180", + 0xE01A4 => "VS181", + 0xE01A5 => "VS182", + 0xE01A6 => "VS183", + 0xE01A7 => "VS184", + 0xE01A8 => "VS185", + 0xE01A9 => "VS186", + 0xE01AA => "VS187", + 0xE01AB => "VS188", + 0xE01AC => "VS189", + 0xE01AD => "VS190", + 0xE01AE => "VS191", + 0xE01AF => "VS192", + 0xE01B0 => "VS193", + 0xE01B1 => "VS194", + 0xE01B2 => "VS195", + 0xE01B3 => "VS196", + 0xE01B4 => "VS197", + 0xE01B5 => "VS198", + 0xE01B6 => "VS199", + 0xE01B7 => "VS200", + 0xE01B8 => "VS201", + 0xE01B9 => "VS202", + 0xE01BA => "VS203", + 0xE01BB => "VS204", + 0xE01BC => "VS205", + 0xE01BD => "VS206", + 0xE01BE => "VS207", + 0xE01BF => "VS208", + 0xE01C0 => "VS209", + 0xE01C1 => "VS210", + 0xE01C2 => "VS211", + 0xE01C3 => "VS212", + 0xE01C4 => "VS213", + 0xE01C5 => "VS214", + 0xE01C6 => "VS215", + 0xE01C7 => "VS216", + 0xE01C8 => "VS217", + 0xE01C9 => "VS218", + 0xE01CA => "VS219", + 0xE01CB => "VS220", + 0xE01CC => "VS221", + 0xE01CD => "VS222", + 0xE01CE => "VS223", + 0xE01CF => "VS224", + 0xE01D0 => "VS225", + 0xE01D1 => "VS226", + 0xE01D2 => "VS227", + 0xE01D3 => "VS228", + 0xE01D4 => "VS229", + 0xE01D5 => "VS230", + 0xE01D6 => "VS231", + 0xE01D7 => "VS232", + 0xE01D8 => "VS233", + 0xE01D9 => "VS234", + 0xE01DA => "VS235", + 0xE01DB => "VS236", + 0xE01DC => "VS237", + 0xE01DD => "VS238", + 0xE01DE => "VS239", + 0xE01DF => "VS240", + 0xE01E0 => "VS241", + 0xE01E1 => "VS242", + 0xE01E2 => "VS243", + 0xE01E3 => "VS244", + 0xE01E4 => "VS245", + 0xE01E5 => "VS246", + 0xE01E6 => "VS247", + 0xE01E7 => "VS248", + 0xE01E8 => "VS249", + 0xE01E9 => "VS250", + 0xE01EA => "VS251", + 0xE01EB => "VS252", + 0xE01EC => "VS253", + 0xE01ED => "VS254", + 0xE01EE => "VS255", + 0xE01EF => "VS256", }.freeze - COULD_BE_WHITESPACE = '[\p{Space}­᠎​‌‍⁠⁡⁢⁣⁤⠀𛲠𛲡𛲢𛲣𝅙𝅳𝅴𝅵𝅶𝅷𝅸𝅹𝅺]'.freeze - def self.symbolify(char, encoding = char.encoding) - return "n/a" if Unicode::Categories.category(char) == "Cn" + def self.symbolify(char, char_info) + if !char_info.valid? + "�" + else + case char_info + when UnicodeCharacteristics + Symbolify.unicode(char, char_info) + when ByteCharacteristics + Symbolify.byte(char, char_info) + when AsciiCharacteristics + Symbolify.ascii(char, char_info) + else + Symbolify.binary(char) + end + end + end + def self.unicode(char, char_info) + return "n/a" if !char_info.assigned? + char = char.dup + ord = char.ord + encoding = char_info.encoding - char.tr!( - ASCII_CONTROL_CODEPOINTS.encode(encoding), - ASCII_CONTROL_SYMBOLS.encode(encoding) - ) - char.gsub!( - Regexp.compile(COULD_BE_WHITESPACE.encode(encoding)), - ']\0['.encode(encoding) - ) + if char_info.delete? + char = CONTROL_DELETE_SYMBOL + elsif char_info.c0? + char = CONTROL_C0_SYMBOLS[ord] + elsif char_info.c1? + char = CONTROL_C1_NAMES[ord] + elsif char_info.blank? + char = "]".encode(encoding) + char + "[".encode(encoding) + elsif ord > 917536 && ord < 917631 + char = "TAG ".encode(encoding) + + char.tr(TAGS.encode(encoding), ASCII_CHARS.encode(encoding)) + else + char = INTERESTING_CODEPOINTS[char.ord] || char + end - INTERESTING_CODEPOINTS.each{ |cp, desc| - char.gsub! Regexp.compile(cp.encode(encoding)), desc.encode(encoding) - } - char.gsub! TAG_START.encode(encoding), TAG_START_SYMBOL.encode(encoding) - char.gsub! TAG_SPACE.encode(encoding), TAG_SPACE_SYMBOL.encode(encoding) - char.gsub! TAG_DELETE.encode(encoding), TAG_DELETE_SYMBOL.encode(encoding) + char.encode("UTF-8") + end + def self.byte(char, char_info) + return "n/a" if !char_info.assigned? + ord = char.ord - if ord > 917536 && ord < 917631 - char.tr!(TAGS.encode(encoding), ASCII_CHARS.encode(encoding)) - char = "TAG ".encode(encoding) + char + encoding = char_info.encoding + no_converter = !!(NO_UTF8_CONVERTER =~ encoding.name) + treat_char_unconverted = false + + if char_info.delete? + char = CONTROL_DELETE_SYMBOL + elsif char_info.c0? + char = CONTROL_C0_SYMBOLS[ord] + elsif char_info.c1? + char = CONTROL_C1_NAMES[ord] + elsif no_converter + treat_char_unconverted = true + elsif char_info.blank? + char = "]".encode(encoding) + char + "[".encode(encoding) end + if no_converter && treat_char_unconverted + char.inspect + else + char.encode("UTF-8") + end + end + + def self.ascii(char, char_info) + if char_info.delete? + char = CONTROL_DELETE_SYMBOL + elsif char_info.c0? + char = CONTROL_C0_SYMBOLS[char.ord] + elsif char_info.blank? + char = "]" + char + "[" + end + char + end + + def self.binary(char) + char.inspect end end end