lib/unibits.rb in unibits-2.1.1 vs lib/unibits.rb in unibits-2.2.0

- old
+ new

@@ -8,10 +8,11 @@ module Unibits SUPPORTED_ENCODINGS = Encoding.name_list.grep( Regexp.union( /^UTF-8$/, + /^UTF8-/, /^UTF-...E$/, /^ASCII-8BIT$/, /^US-ASCII$/, /^ISO-8859-/, /^Windows-125/, @@ -28,10 +29,11 @@ invalid: "#FF0000", unassigned: "#FF5500", control: "#0000FF", blank: "#33AADD", format: "#FF00FF", + mark: "#228822", } DEFAULT_TERMINAL_WIDTH = 80 def self.of(string, encoding: nil, convert: nil, stats: true, wide_ambiguous: false, width: nil) @@ -98,11 +100,11 @@ codepoint = "U+%04X" % char.ord else case encoding_name when "US-ASCII" codepoint = "invalid" - when "UTF-8" + when "UTF-8", /^UTF8/ # this tries to detect what is wrong with this utf-8 encoded string # sorry for this mess case char.unpack("B*")[0] when /^110.{5}$/ current_encoding_error = [:nec, 1, 1] @@ -242,10 +244,12 @@ COLORS[:control] elsif char_info.blank? COLORS[:blank] elsif char_info.format? COLORS[:format] + elsif char_info.unicode? && char_info.category[0] == "M" + COLORS[:mark] else random_color end end @@ -265,10 +269,10 @@ bin_byte_1 = bin_byte_complete[0...1] bin_byte_2 = bin_byte_complete[1...8] when 'ASCII-8BIT' bin_byte_1 = "" bin_byte_2 = bin_byte_complete - when 'UTF-8' + when 'UTF-8', /^UTF8/ if byteindex == 0 if bin_byte_complete =~ /^(0|1{2,4}0)([01]+)$/ bin_byte_1 = $1 bin_byte_2 = $2 else