lib/unibits.rb in unibits-1.3.0 vs lib/unibits.rb in unibits-2.0.0

- old
+ new

@@ -2,21 +2,23 @@ require_relative "unibits/symbolify" require "io/console" require "paint" require "unicode/display_width" -require "unicode/categories" +require "characteristics" module Unibits SUPPORTED_ENCODINGS = [ 'UTF-8', 'UTF-16LE', 'UTF-16BE', 'UTF-32LE', 'UTF-32BE', 'ASCII-8BIT', 'US-ASCII', + /^ISO-8859-/, + /^Windows-125/, ].freeze DEFAULT_TERMINAL_WIDTH = 80 def self.of(string, encoding: nil, convert: nil, stats: true, wide_ambiguous: false, width: nil) if !string || string.empty? @@ -47,47 +49,42 @@ "\n #{valid ? '' : Paint["Invalid ", :bold, :red]}#{Paint[string.encoding.name, :bold]} (#{bytes}/#{codepoints}/#{glyphs}/#{width})" end def self.visualize(string, wide_ambiguous: false, width: nil) cols = width || determine_terminal_cols + encoding_name = string.encoding.name + type = Characteristics.type_from_encoding_name(encoding_name) + cp_buffer = [" "] enc_buffer = [" "] hex_buffer = [" "] bin_buffer = [" "] separator = [" "] current_encoding_error = nil puts string.each_char{ |char| - if char.valid_encoding? - char_valid = true - current_encoding_error = nil - if Unicode::Categories.category(char) == "Cn" - current_color = "#FF5500" - else - current_color = random_color - end - else - char_valid = false - current_color = :red - end + char_info = Characteristics.create_for_type(char, type) + current_color = determine_char_color(char_info) + current_encoding_error = nil if char_info.valid? + char.each_byte.with_index{ |byte, index| if Paint.unpaint(hex_buffer[-1]).bytesize > cols - 12 cp_buffer << " " enc_buffer << " " hex_buffer << " " bin_buffer << " " separator << " " end if index == 0 - if char_valid + if char_info.valid? codepoint = "U+%04X" % char.ord else - case string.encoding.name + case encoding_name when "US-ASCII" codepoint = "invalid" when "UTF-8" # this tries to detect what is wrong with this utf-8 encoded string # sorry for this mess @@ -165,11 +162,11 @@ codepoint = "invalid" end when 'UTF-16LE', 'UTF-16BE' if char.bytesize.odd? codepoint = "incompl." - elsif char.b[string.encoding.name == 'UTF-16LE' ? 1 : 0].unpack("B*")[0][0, 5] == "11011" + elsif char.b[encoding_name == 'UTF-16LE' ? 1 : 0].unpack("B*")[0][0, 5] == "11011" codepoint = "hlf.srg." else codepoint = "invalid" end when 'UTF-32LE', 'UTF-32BE' @@ -183,18 +180,18 @@ cp_buffer[-1] << Paint[ codepoint.ljust(10), current_color, :bold ] - if char_valid - symbolified_char = symbolify(char) + symbolified_char = Symbolify.symbolify(char, char_info) + + if char_info.unicode? + padding = 10 - Unicode::DisplayWidth.of(symbolified_char, wide_ambiguous ? 2 : 1) else - symbolified_char = "�" + padding = 10 - symbolified_char.size end - padding = 10 - Unicode::DisplayWidth.of(symbolified_char, wide_ambiguous ? 2 : 1) - enc_buffer[-1] << Paint[ symbolified_char, current_color ] enc_buffer[-1] << " " * padding if padding > 0 else @@ -206,15 +203,15 @@ ("%02X" % byte).ljust(10, " "), current_color ] bin_byte_complete = byte.to_s(2).rjust(8, "0") - if !char_valid + if !char_info.valid? bin_byte_1 = bin_byte_complete bin_byte_2 = "" else - case string.encoding.name + case encoding_name when 'US-ASCII' bin_byte_1 = bin_byte_complete[0...1] bin_byte_2 = bin_byte_complete[1...8] when 'ASCII-8BIT' bin_byte_1 = "" @@ -251,10 +248,13 @@ bin_byte_2 = $2 end when 'UTF-32LE', 'UTF-32BE' bin_byte_1 = "" bin_byte_2 = bin_byte_complete + when /^(ISO-8859-|Windows-125)/ + bin_byte_1 = "" + bin_byte_2 = bin_byte_complete end end bin_buffer[-1] << Paint[ bin_byte_1, current_color @@ -266,27 +266,36 @@ bin_buffer[-1] << " " } } - if string.encoding.name[0, 3] == "UTF" + if type == :unicode enc_buffer.zip(cp_buffer, hex_buffer, bin_buffer, separator).flatten.join("\n") else enc_buffer.zip(hex_buffer, bin_buffer, separator).flatten.join("\n") end end - def self.random_color - "%.2x%.2x%.2x" %[rand(90) + 60, rand(90) + 60, rand(90) + 60] - end - - def self.symbolify(char) - return char.inspect unless char.encoding.name[0, 3] == "UTF" - Symbolify.symbolify(char).encode('UTF-8') - end - def self.determine_terminal_cols STDIN.winsize[1] || DEFAULT_TERMINAL_WIDTH rescue Errno::ENOTTY return DEFAULT_TERMINAL_WIDTH + end + + def self.determine_char_color(char_info) + if !char_info.valid? + "#FF0000" + elsif !char_info.assigned? + "#FF5500" + elsif char_info.control? + "#0000FF" + elsif char_info.blank? + "#33AADD" + else + random_color + end + end + + def self.random_color + "%.2x%.2x%.2x" % [rand(90) + 60, rand(90) + 60, rand(90) + 60] end end