lib/unibits.rb in unibits-2.1.1 vs lib/unibits.rb in unibits-2.2.0
- old
+ new
@@ -8,10 +8,11 @@
module Unibits
SUPPORTED_ENCODINGS = Encoding.name_list.grep(
Regexp.union(
/^UTF-8$/,
+ /^UTF8-/,
/^UTF-...E$/,
/^ASCII-8BIT$/,
/^US-ASCII$/,
/^ISO-8859-/,
/^Windows-125/,
@@ -28,10 +29,11 @@
invalid: "#FF0000",
unassigned: "#FF5500",
control: "#0000FF",
blank: "#33AADD",
format: "#FF00FF",
+ mark: "#228822",
}
DEFAULT_TERMINAL_WIDTH = 80
def self.of(string, encoding: nil, convert: nil, stats: true, wide_ambiguous: false, width: nil)
@@ -98,11 +100,11 @@
codepoint = "U+%04X" % char.ord
else
case encoding_name
when "US-ASCII"
codepoint = "invalid"
- when "UTF-8"
+ when "UTF-8", /^UTF8/
# this tries to detect what is wrong with this utf-8 encoded string
# sorry for this mess
case char.unpack("B*")[0]
when /^110.{5}$/
current_encoding_error = [:nec, 1, 1]
@@ -242,10 +244,12 @@
COLORS[:control]
elsif char_info.blank?
COLORS[:blank]
elsif char_info.format?
COLORS[:format]
+ elsif char_info.unicode? && char_info.category[0] == "M"
+ COLORS[:mark]
else
random_color
end
end
@@ -265,10 +269,10 @@
bin_byte_1 = bin_byte_complete[0...1]
bin_byte_2 = bin_byte_complete[1...8]
when 'ASCII-8BIT'
bin_byte_1 = ""
bin_byte_2 = bin_byte_complete
- when 'UTF-8'
+ when 'UTF-8', /^UTF8/
if byteindex == 0
if bin_byte_complete =~ /^(0|1{2,4}0)([01]+)$/
bin_byte_1 = $1
bin_byte_2 = $2
else