lib/unibits.rb in unibits-2.0.0 vs lib/unibits.rb in unibits-2.1.0
- old
+ new
@@ -5,21 +5,35 @@
require "paint"
require "unicode/display_width"
require "characteristics"
module Unibits
- SUPPORTED_ENCODINGS = [
- 'UTF-8',
- 'UTF-16LE',
- 'UTF-16BE',
- 'UTF-32LE',
- 'UTF-32BE',
- 'ASCII-8BIT',
- 'US-ASCII',
- /^ISO-8859-/,
- /^Windows-125/,
- ].freeze
+ SUPPORTED_ENCODINGS = Encoding.name_list.grep(
+ Regexp.union(
+ /^UTF-8$/,
+ /^UTF-...E$/,
+ /^ASCII-8BIT$/,
+ /^US-ASCII$/,
+ /^ISO-8859-/,
+ /^Windows-125/,
+ /^IBM/,
+ /^CP85/,
+ /^mac/,
+ /^TIS-620$/,
+ /^Windows-874$/,
+ /^KOI8/,
+ )
+ ).sort.freeze
+
+ COLORS = {
+ invalid: "#FF0000",
+ unassigned: "#FF5500",
+ control: "#0000FF",
+ blank: "#33AADD",
+ format: "#FF00FF",
+ }
+
DEFAULT_TERMINAL_WIDTH = 80
def self.of(string, encoding: nil, convert: nil, stats: true, wide_ambiguous: false, width: nil)
if !string || string.empty?
raise ArgumentError, "no data given to unibits"
@@ -67,20 +81,20 @@
char_info = Characteristics.create_for_type(char, type)
current_color = determine_char_color(char_info)
current_encoding_error = nil if char_info.valid?
- char.each_byte.with_index{ |byte, index|
+ char.each_byte.with_index{ |byte, byteindex|
if Paint.unpaint(hex_buffer[-1]).bytesize > cols - 12
cp_buffer << " "
enc_buffer << " "
hex_buffer << " "
bin_buffer << " "
separator << " "
end
- if index == 0
+ if byteindex == 0
if char_info.valid?
codepoint = "U+%04X" % char.ord
else
case encoding_name
when "US-ASCII"
@@ -176,96 +190,30 @@
codepoint = "invalid"
end
end
end
- cp_buffer[-1] << Paint[
- codepoint.ljust(10), current_color, :bold
- ]
+ cp_buffer[-1] << Paint[ codepoint.ljust(10), current_color, :bold ]
symbolified_char = Symbolify.symbolify(char, char_info)
if char_info.unicode?
padding = 10 - Unicode::DisplayWidth.of(symbolified_char, wide_ambiguous ? 2 : 1)
else
padding = 10 - symbolified_char.size
end
- enc_buffer[-1] << Paint[
- symbolified_char, current_color
- ]
+ enc_buffer[-1] << Paint[ symbolified_char, current_color ]
enc_buffer[-1] << " " * padding if padding > 0
else
cp_buffer[-1] << " " * 10
enc_buffer[-1] << " " * 10
end
- hex_buffer[-1] << Paint[
- ("%02X" % byte).ljust(10, " "), current_color
- ]
+ hex_buffer[-1] << Paint[ ("%02X" % byte).ljust(10, " "), current_color ]
- bin_byte_complete = byte.to_s(2).rjust(8, "0")
-
- if !char_info.valid?
- bin_byte_1 = bin_byte_complete
- bin_byte_2 = ""
- else
- case encoding_name
- when 'US-ASCII'
- bin_byte_1 = bin_byte_complete[0...1]
- bin_byte_2 = bin_byte_complete[1...8]
- when 'ASCII-8BIT'
- bin_byte_1 = ""
- bin_byte_2 = bin_byte_complete
- when 'UTF-8'
- if index == 0
- if bin_byte_complete =~ /^(0|1{2,4}0)([01]+)$/
- bin_byte_1 = $1
- bin_byte_2 = $2
- else
- bin_byte_1 = ""
- bin_byte_2 = bin_byte_complete
- end
- else
- bin_byte_1 = bin_byte_complete[0...2]
- bin_byte_2 = bin_byte_complete[2...8]
- end
- when 'UTF-16LE'
- if char.ord <= 0xFFFF || index == 0 || index == 2
- bin_byte_1 = ""
- bin_byte_2 = bin_byte_complete
- else
- bin_byte_complete =~ /^(11011[01])([01]+)$/
- bin_byte_1 = $1
- bin_byte_2 = $2
- end
- when 'UTF-16BE'
- if char.ord <= 0xFFFF || index == 1 || index == 3
- bin_byte_1 = ""
- bin_byte_2 = bin_byte_complete
- else
- bin_byte_complete =~ /^(11011[01])([01]+)$/
- bin_byte_1 = $1
- bin_byte_2 = $2
- end
- when 'UTF-32LE', 'UTF-32BE'
- bin_byte_1 = ""
- bin_byte_2 = bin_byte_complete
- when /^(ISO-8859-|Windows-125)/
- bin_byte_1 = ""
- bin_byte_2 = bin_byte_complete
- end
- end
-
- bin_buffer[-1] << Paint[
- bin_byte_1, current_color
- ] unless !bin_byte_1 || bin_byte_1.empty?
-
- bin_buffer[-1] << Paint[
- bin_byte_2, current_color, :underline
- ] unless !bin_byte_2 || bin_byte_2.empty?
-
+ bin_buffer[-1] << highlight_bits(byte, char, char_info, current_color, byteindex)
bin_buffer[-1] << " "
}
}
if type == :unicode
@@ -281,21 +229,80 @@
return DEFAULT_TERMINAL_WIDTH
end
def self.determine_char_color(char_info)
if !char_info.valid?
- "#FF0000"
+ COLORS[:invalid]
elsif !char_info.assigned?
- "#FF5500"
+ COLORS[:unassigned]
elsif char_info.control?
- "#0000FF"
+ COLORS[:control]
elsif char_info.blank?
- "#33AADD"
+ COLORS[:blank]
+ elsif char_info.format?
+ COLORS[:format]
else
random_color
end
end
def self.random_color
"%.2x%.2x%.2x" % [rand(90) + 60, rand(90) + 60, rand(90) + 60]
+ end
+
+ def self.highlight_bits(byte, char, char_info, current_color, byteindex)
+ bin_byte_complete = byte.to_s(2).rjust(8, "0")
+
+ if !char_info.valid?
+ bin_byte_1 = bin_byte_complete
+ bin_byte_2 = ""
+ else
+ case char_info.encoding.name
+ when 'US-ASCII'
+ bin_byte_1 = bin_byte_complete[0...1]
+ bin_byte_2 = bin_byte_complete[1...8]
+ when 'ASCII-8BIT'
+ bin_byte_1 = ""
+ bin_byte_2 = bin_byte_complete
+ when 'UTF-8'
+ if byteindex == 0
+ if bin_byte_complete =~ /^(0|1{2,4}0)([01]+)$/
+ bin_byte_1 = $1
+ bin_byte_2 = $2
+ else
+ bin_byte_1 = ""
+ bin_byte_2 = bin_byte_complete
+ end
+ else
+ bin_byte_1 = bin_byte_complete[0...2]
+ bin_byte_2 = bin_byte_complete[2...8]
+ end
+ when 'UTF-16LE'
+ if char.ord <= 0xFFFF || byteindex == 0 || byteindex == 2
+ bin_byte_1 = ""
+ bin_byte_2 = bin_byte_complete
+ else
+ bin_byte_complete =~ /^(11011[01])([01]+)$/
+ bin_byte_1 = $1
+ bin_byte_2 = $2
+ end
+ when 'UTF-16BE'
+ if char.ord <= 0xFFFF || byteindex == 1 || byteindex == 3
+ bin_byte_1 = ""
+ bin_byte_2 = bin_byte_complete
+ else
+ bin_byte_complete =~ /^(11011[01])([01]+)$/
+ bin_byte_1 = $1
+ bin_byte_2 = $2
+ end
+ else
+ bin_byte_1 = ""
+ bin_byte_2 = bin_byte_complete
+ end
+ end
+
+ res = ""
+ res << Paint[ bin_byte_1, current_color ] unless !bin_byte_1 || bin_byte_1.empty?
+ res << Paint[ bin_byte_2, current_color, :underline ] unless !bin_byte_2 || bin_byte_2.empty?
+ res
end
end