lib/unibits.rb in unibits-2.1.0 vs lib/unibits.rb in unibits-2.1.1
- old
+ new
@@ -77,10 +77,11 @@
current_encoding_error = nil
puts
string.each_char{ |char|
char_info = Characteristics.create_for_type(char, type)
+ double_check_utf32_validness!(char, char_info)
current_color = determine_char_color(char_info)
current_encoding_error = nil if char_info.valid?
char.each_byte.with_index{ |byte, byteindex|
@@ -182,14 +183,17 @@
codepoint = "hlf.srg."
else
codepoint = "invalid"
end
when 'UTF-32LE', 'UTF-32BE'
- if char.bytesize != "4"
+ if char.bytesize % 4 != 0
codepoint = "incompl."
+ elsif char.b.unpack("C*")[encoding_name == 'UTF-32LE' ? 2 : 1] > 16 ||
+ char.b.unpack("C*")[encoding_name == 'UTF-32LE' ? 3 : 0] > 0
+ codepoint = "toolarge"
else
- codepoint = "invalid"
+ codepoint = "sur.gate"
end
end
end
cp_buffer[-1] << Paint[ codepoint.ljust(10), current_color, :bold ]
@@ -302,7 +306,18 @@
res = ""
res << Paint[ bin_byte_1, current_color ] unless !bin_byte_1 || bin_byte_1.empty?
res << Paint[ bin_byte_2, current_color, :underline ] unless !bin_byte_2 || bin_byte_2.empty?
res
+ end
+
+ def self.double_check_utf32_validness!(char, char_info)
+ return if RUBY_VERSION > "2.4.0" || char_info.encoding.name[0, 6] != "UTF-32" || !char_info.valid?
+ byte_values = char.b.unpack("C*")
+ le = char_info.encoding.name == 'UTF-32LE'
+ if byte_values[le ? 2 : 1] > 16 ||
+ byte_values[le ? 3 : 0] > 0 ||
+ byte_values[le ? 1 : 2] >= 216 && byte_values[le ? 1 : 2] <= 223
+ char_info.instance_variable_set(:@is_valid, false)
+ end
end
end