spec/unibits_spec.rb in unibits-1.0.0 vs spec/unibits_spec.rb in unibits-1.1.0

- old
+ new

@@ -64,7 +64,173 @@ result = Paint.unpaint(Unibits.visualize("ASCII string".force_encoding('ASCII'))) result.must_match "C" result.must_match "43" result.must_match "01000011" end + + describe "invalid UTF-8 encodings" do + it "- unexpected continuation byte (1/2)" do + string = "abc\x80efg" + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "�" + result.must_match "unexp.c." + result.must_match /e.*f.*g/m + end + + it "- unexpected continuation byte (2/2)" do + string = "🌫\x81efg" + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "�" + result.must_match "unexp.c." + result.must_match /e.*f.*g/m + end + + it "- not enough continuation bytes" do + string = "\xF0\x9F\x8CABC" + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "�" + result.must_match "n.e.con." + result.must_match /A.*B.*C/m + end + + it "- overlong padding (1/2)" do + string = "\xE0\x81\x81ABC" + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "�" + result.must_match /overlong.*overlong.*overlong/m + result.must_match /A.*B.*C/m + end + + it "- overlong padding (2/2)" do + string = "\xC0\x80no double null" + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "�" + result.must_match /overlong.*overlong/m + end + + it "- too large codepoint (1/2)" do + string = "\xF5\x8F\xBF\xBFABC" + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "�" + result.must_match /toolarge.*toolarge.*toolarge.*toolarge/m + result.must_match /A.*B.*C/m + end + + it "- too large codepoint (2/2)" do + string = "\xF4\xAF\xBF\xBFABC" + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "�" + result.must_match /toolarge.*toolarge.*toolarge.*toolarge/m + result.must_match /A.*B.*C/m + end + + it "- too large byte" do + string = "\xFF" + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "�" + result.must_match "toolarge" + end + + it "- has surrogate (1/2)" do + string = "\xED\xA0\x80ABC" + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "�" + result.must_match "sur.gate" + result.must_match /A.*B.*C/m + end + + it "- has surrogate (2/2)" do + string = "\xED\xBF\xBFABC" + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "�" + result.must_match "sur.gate" + result.must_match /A.*B.*C/m + end + end + + describe "invalid UTF-16 encodings" do + it "- incomplete number of bytes (1/2)" do + string = "a".b.force_encoding("UTF-16LE") + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "incompl." + result.must_match "�" + end + + it "- incomplete number of bytes (2/2)" do + string = "🌫".b[0..-2].force_encoding("UTF-16LE") + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "incompl." + result.must_match "�" + end + + it "- only lower half surrogate" do + string = "\x3C\xD8\x2Ba".force_encoding("UTF-16LE") + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "hlf.srg." + result.must_match "�" + end + + it "- only higher half surrogate" do + string = "\x3Ca\x2B\xDF".force_encoding("UTF-16LE") + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "hlf.srg." + result.must_match "�" + end + end + + describe "invalid UTF-32 encodings" do + # please note, currently, too large codepoints and encoded utf16 surrogates are treated as valid encodings + + it "- incomplete number of bytes (1/3)" do + string = "a".b.force_encoding("UTF-32LE") + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "incompl." + result.must_match "�" + end + + it "- incomplete number of bytes (2/3)" do + string = "🌫".b[0..-2].force_encoding("UTF-32LE") + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "incompl." + result.must_match "�" + end + + it "- incomplete number of bytes (3/3)" do + string = "🌫".b[0..-2].force_encoding("UTF-32LE") + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "incompl." + result.must_match "�" + end + end + + describe "invalid ASCII encodings" do + it "- contains bytes with 8th bit set" do + string = "abc\x80efg".force_encoding("ASCII") + result = Paint.unpaint(Unibits.visualize(string)) + result.must_match "�" + result.must_match /e.*f.*g/m + end + end + end + + describe "wide_ambiguous: option" do + it "- default is 1" do + string = "⚀······" + result = Unibits.stats(string) + result.wont_match "13" + end + + it "- default is 2" do + string = "⚀······" + result = Unibits.stats(string, wide_ambiguous: true) + result.must_match "13" + end + end + + describe "width: option" do + it "sets a custom column width" do + string = "bla" * 99 + result = Paint.unpaint(Unibits.visualize(string, width: 50)) + (result[/^.*$/].size <= 50).must_equal true + end end end