lib/unibits.rb in unibits-1.3.0 vs lib/unibits.rb in unibits-2.0.0
- old
+ new
@@ -2,21 +2,23 @@
require_relative "unibits/symbolify"
require "io/console"
require "paint"
require "unicode/display_width"
-require "unicode/categories"
+require "characteristics"
module Unibits
SUPPORTED_ENCODINGS = [
'UTF-8',
'UTF-16LE',
'UTF-16BE',
'UTF-32LE',
'UTF-32BE',
'ASCII-8BIT',
'US-ASCII',
+ /^ISO-8859-/,
+ /^Windows-125/,
].freeze
DEFAULT_TERMINAL_WIDTH = 80
def self.of(string, encoding: nil, convert: nil, stats: true, wide_ambiguous: false, width: nil)
if !string || string.empty?
@@ -47,47 +49,42 @@
"\n #{valid ? '' : Paint["Invalid ", :bold, :red]}#{Paint[string.encoding.name, :bold]} (#{bytes}/#{codepoints}/#{glyphs}/#{width})"
end
def self.visualize(string, wide_ambiguous: false, width: nil)
cols = width || determine_terminal_cols
+ encoding_name = string.encoding.name
+ type = Characteristics.type_from_encoding_name(encoding_name)
+
cp_buffer = [" "]
enc_buffer = [" "]
hex_buffer = [" "]
bin_buffer = [" "]
separator = [" "]
current_encoding_error = nil
puts
string.each_char{ |char|
- if char.valid_encoding?
- char_valid = true
- current_encoding_error = nil
- if Unicode::Categories.category(char) == "Cn"
- current_color = "#FF5500"
- else
- current_color = random_color
- end
- else
- char_valid = false
- current_color = :red
- end
+ char_info = Characteristics.create_for_type(char, type)
+ current_color = determine_char_color(char_info)
+ current_encoding_error = nil if char_info.valid?
+
char.each_byte.with_index{ |byte, index|
if Paint.unpaint(hex_buffer[-1]).bytesize > cols - 12
cp_buffer << " "
enc_buffer << " "
hex_buffer << " "
bin_buffer << " "
separator << " "
end
if index == 0
- if char_valid
+ if char_info.valid?
codepoint = "U+%04X" % char.ord
else
- case string.encoding.name
+ case encoding_name
when "US-ASCII"
codepoint = "invalid"
when "UTF-8"
# this tries to detect what is wrong with this utf-8 encoded string
# sorry for this mess
@@ -165,11 +162,11 @@
codepoint = "invalid"
end
when 'UTF-16LE', 'UTF-16BE'
if char.bytesize.odd?
codepoint = "incompl."
- elsif char.b[string.encoding.name == 'UTF-16LE' ? 1 : 0].unpack("B*")[0][0, 5] == "11011"
+ elsif char.b[encoding_name == 'UTF-16LE' ? 1 : 0].unpack("B*")[0][0, 5] == "11011"
codepoint = "hlf.srg."
else
codepoint = "invalid"
end
when 'UTF-32LE', 'UTF-32BE'
@@ -183,18 +180,18 @@
cp_buffer[-1] << Paint[
codepoint.ljust(10), current_color, :bold
]
- if char_valid
- symbolified_char = symbolify(char)
+ symbolified_char = Symbolify.symbolify(char, char_info)
+
+ if char_info.unicode?
+ padding = 10 - Unicode::DisplayWidth.of(symbolified_char, wide_ambiguous ? 2 : 1)
else
- symbolified_char = "�"
+ padding = 10 - symbolified_char.size
end
- padding = 10 - Unicode::DisplayWidth.of(symbolified_char, wide_ambiguous ? 2 : 1)
-
enc_buffer[-1] << Paint[
symbolified_char, current_color
]
enc_buffer[-1] << " " * padding if padding > 0
else
@@ -206,15 +203,15 @@
("%02X" % byte).ljust(10, " "), current_color
]
bin_byte_complete = byte.to_s(2).rjust(8, "0")
- if !char_valid
+ if !char_info.valid?
bin_byte_1 = bin_byte_complete
bin_byte_2 = ""
else
- case string.encoding.name
+ case encoding_name
when 'US-ASCII'
bin_byte_1 = bin_byte_complete[0...1]
bin_byte_2 = bin_byte_complete[1...8]
when 'ASCII-8BIT'
bin_byte_1 = ""
@@ -251,10 +248,13 @@
bin_byte_2 = $2
end
when 'UTF-32LE', 'UTF-32BE'
bin_byte_1 = ""
bin_byte_2 = bin_byte_complete
+ when /^(ISO-8859-|Windows-125)/
+ bin_byte_1 = ""
+ bin_byte_2 = bin_byte_complete
end
end
bin_buffer[-1] << Paint[
bin_byte_1, current_color
@@ -266,27 +266,36 @@
bin_buffer[-1] << " "
}
}
- if string.encoding.name[0, 3] == "UTF"
+ if type == :unicode
enc_buffer.zip(cp_buffer, hex_buffer, bin_buffer, separator).flatten.join("\n")
else
enc_buffer.zip(hex_buffer, bin_buffer, separator).flatten.join("\n")
end
end
- def self.random_color
- "%.2x%.2x%.2x" %[rand(90) + 60, rand(90) + 60, rand(90) + 60]
- end
-
- def self.symbolify(char)
- return char.inspect unless char.encoding.name[0, 3] == "UTF"
- Symbolify.symbolify(char).encode('UTF-8')
- end
-
def self.determine_terminal_cols
STDIN.winsize[1] || DEFAULT_TERMINAL_WIDTH
rescue Errno::ENOTTY
return DEFAULT_TERMINAL_WIDTH
+ end
+
+ def self.determine_char_color(char_info)
+ if !char_info.valid?
+ "#FF0000"
+ elsif !char_info.assigned?
+ "#FF5500"
+ elsif char_info.control?
+ "#0000FF"
+ elsif char_info.blank?
+ "#33AADD"
+ else
+ random_color
+ end
+ end
+
+ def self.random_color
+ "%.2x%.2x%.2x" % [rand(90) + 60, rand(90) + 60, rand(90) + 60]
end
end