spec/string_spec.rb in utf8-0.1.6 vs spec/string_spec.rb in utf8-0.1.7
- old
+ new
@@ -1,221 +1,219 @@
# encoding: utf-8
require File.expand_path('../spec_helper', __FILE__)
describe String::UTF8 do
- before(:each) do
+ before :each do
@char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
@str = @char_array.join
@utf8 = @str.as_utf8
@utf8_len = @char_array.size
@codepoints = @char_array.map{|c| c.unpack('U').first}
end
- it "should blow up on invalid utf8 chars" do
+ test "should blow up on invalid utf8 chars" do
# lets cut right into the middle of a sequence so we know it's bad
@str.force_encoding('binary') if @str.respond_to?(:force_encoding)
utf8 = @str[0..1]
utf8.force_encoding('utf-8') if utf8.respond_to?(:force_encoding)
utf8 = utf8.as_utf8
- lambda {
+ assert_raise ArgumentError do
utf8.length
- }.should raise_error(ArgumentError)
+ end
- lambda {
+ assert_raise ArgumentError do
utf8[0, 10]
- }.should raise_error(ArgumentError)
+ end
- lambda {
+ assert_raise ArgumentError do
utf8.chars.to_a
- }.should raise_error(ArgumentError)
+ end
end
- it "should extend String, adding an as_utf8 method that returns a String::UTF8 instance" do
- "".should respond_to(:as_utf8)
- "".as_utf8.class.should eql(String::UTF8)
+ test "should extend String, adding an as_utf8 method that returns a String::UTF8 instance" do
+ assert "".respond_to?(:as_utf8)
+ assert_equal String::UTF8, "".as_utf8.class
end
- it "should allow access to the underlying raw string" do
+ test "should allow access to the underlying raw string" do
raw = @utf8.as_raw
- raw.class.should eql(String)
+ assert_equal String, raw.class
if defined? Encoding
- raw.length.should eql(@utf8_len)
+ assert_equal @utf8_len, raw.length
else
- raw.length.should eql(@str.size)
+ assert_equal @str.size, raw.length
end
end
- it "should wrap all returned strings to be utf8-aware" do
- @utf8[0].class.should eql(String::UTF8)
- @utf8.chars.to_a[0].class.should eql(String::UTF8)
+ test "should wrap all returned strings to be utf8-aware" do
+ assert_equal String::UTF8, @utf8[0].class
+ assert_equal String::UTF8, @utf8.chars.to_a[0].class
end
- it "clean should replace invalid utf8 chars with '?'" do
+ test "clean should replace invalid utf8 chars with '?'" do
orig = "provided by Cristian Rodr\355guez."
clean = "provided by Cristian Rodr?guez."
- orig.as_utf8.clean.should eql(clean)
+ assert_equal clean, orig.as_utf8.clean
+ assert_equal "asdf24??asdf24", "asdf24\206\222asdf24".as_utf8.clean
+ assert_equal "asdf24?asdf24", "asdf24\342asdf24".as_utf8.clean
+ assert_equal "asdf24??asdf24", "asdf24\342\206asdf24".as_utf8.clean
+ assert_equal "asdf24?asdf24", "asdf24\222asdf24".as_utf8.clean
end
- it "clean should not replace valid utf8 chars with '?'" do
- '→'.as_utf8.clean.should eql('→')
+ test "clean should not replace valid utf8 chars with '?'" do
+ assert_equal "asdf24\342\206\222asdf24", "asdf24\342\206\222asdf24".as_utf8.clean
end
context "#length and #size" do
- it "should be utf8-aware" do
- @utf8.length.should eql(@utf8_len)
- @utf8.size.should eql(@utf8_len)
+ test "should be utf8-aware" do
+ assert_equal @utf8_len, @utf8.length
+ assert_equal @utf8_len, @utf8.size
end
end
context "#chars and #each_char" do
- it "should be utf8-aware" do
+ test "should be utf8-aware" do
klass = begin
if defined? Encoding
Enumerator
else
Enumerable::Enumerator
end
end
- @utf8.chars.class.should eql(klass)
+ assert_equal klass, @utf8.chars.class
@utf8.chars do |char|
- char.should_not be_nil
+ assert !char.nil?
end
joined = @utf8.chars.to_a.join
- @utf8.should eql(joined)
- @utf8.chars.to_a.size.should eql(@utf8_len)
- @utf8.chars.to_a.should eql(@char_array)
+ assert_equal joined, @utf8
+ assert_equal @utf8_len, @utf8.chars.to_a.size
+ assert_equal @char_array, @utf8.chars.to_a
end
end
context "#codepoints and #each_codepoint" do
- it "should be utf8-aware" do
+ test "should be utf8-aware" do
klass = begin
if defined? Encoding
Enumerator
else
Enumerable::Enumerator
end
end
- @utf8.codepoints.class.should eql(klass)
+ assert_equal klass, @utf8.codepoints.class
@utf8.codepoints do |codepoint|
- codepoint.should_not be_nil
+ assert !codepoint.nil?
end
- @utf8.codepoints.to_a.size.should eql(@codepoints.size)
- @utf8.codepoints.to_a.should eql(@codepoints)
+ assert_equal @codepoints.size, @utf8.codepoints.to_a.size
+ assert_equal @codepoints, @utf8.codepoints.to_a
end
end
context "[offset] syntax" do
- it "should be utf8-aware" do
+ test "should be utf8-aware" do
@char_array.each_with_index do |char, i|
utf8_char = @utf8[i]
- utf8_char.should eql(char)
+ assert_equal char, utf8_char
end
end
- it "should support negative indices" do
+ test "should support negative indices" do
utf8_char = @utf8[-5]
- utf8_char.should eql(@char_array[-5])
+ assert_equal @char_array[-5], utf8_char
end
- it "should return nil for out of range indices" do
- @utf8[100].should be_nil
- @utf8[-100].should be_nil
+ test "should return nil for out of range indices" do
+ assert @utf8[100].nil?
+ assert @utf8[-100].nil?
end
end
context "[offset, length] syntax" do
- it "should be utf8-aware" do
- utf8_char = @utf8[1, 4]
- utf8_char.should eql(@char_array[1, 4].join)
+ test "should be utf8-aware" do
+ assert_equal @char_array[1, 4].join, @utf8[1, 4]
+ assert_equal @char_array[0, 6].join, @utf8[0, 6]
- utf8_char = @utf8[0, 6]
- utf8_char.should eql(@char_array[0, 6].join)
-
# this will fail due to a bug in 1.9
unless defined? Encoding
- utf8_char = @utf8[6, 100]
- utf8_char.should eql(@char_array[6, 100].join)
+ assert_equal @char_array[6, 100].join, @utf8[6, 100]
end
- utf8_char = @utf8[-1, 2]
- utf8_char.should eql(@char_array[-1, 2].join)
-
- utf8_char = @utf8[-1, 100]
- utf8_char.should eql(@char_array[-1, 100].join)
-
- utf8_char = @utf8[0, 0]
- utf8_char.should eql(@char_array[0, 0].join)
+ assert_equal @char_array[-1, 2].join, @utf8[-1, 2]
+ assert_equal @char_array[-1, 100].join, @utf8[-1, 100]
+ assert_equal @char_array[0, 0].join, @utf8[0, 0]
end
- it "should return nil for an out of range offset or length" do
- @utf8[100, 100].should be_nil
- @utf8[-100, 100].should be_nil
- @utf8[0, -100].should be_nil
+ test "should return nil for an out of range offset or length" do
+ assert @utf8[100, 100].nil?
+ assert @utf8[-100, 100].nil?
+ assert @utf8[0, -100].nil?
end
end
context "[Range] syntax" do
- it "should be utf8-aware" do
- utf8_char = @utf8[1..4]
- utf8_char.should eql(@char_array[1..4].join)
+ test "should be utf8-aware" do
+ assert_equal @char_array[1..4].join, @utf8[1..4]
+ assert_equal @char_array[0..6].join, @utf8[0..6]
- utf8_char = @utf8[0..6]
- utf8_char.should eql(@char_array[0..6].join)
-
# this will fail due to a bug in 1.9
unless defined? Encoding
- utf8_char = @utf8[6..100]
- utf8_char.should eql(@char_array[6..100].join)
+ assert_equal @char_array[6..100].join, @utf8[6..100]
end
- utf8_char = @utf8[-1..2]
- utf8_char.should eql(@char_array[-1..2].join)
-
- utf8_char = @utf8[-1..100]
- utf8_char.should eql(@char_array[-1..100].join)
+ assert_equal @char_array[-1..2].join, @utf8[-1..2]
+ assert_equal @char_array[-1..100].join, @utf8[-1..100]
end
- it "should return nil for an out of range offset or length" do
- @utf8[100..100].should be_nil
- @utf8[-100..100].should be_nil
- @utf8[0..-100].should eql("")
+ test "should return nil for an out of range offset or length" do
+ assert @utf8[100..100].nil?
+ assert @utf8[-100..100].nil?
+ assert_equal "", @utf8[0..-100]
end
end
context "#valid?" do
- it "should test validity" do
+ test "should test validity" do
# lets cut right into the middle of a sequence so we know it's bad
@str.force_encoding('binary') if @str.respond_to?(:force_encoding)
utf8 = @str[0..1]
utf8.force_encoding('utf-8') if utf8.respond_to?(:force_encoding)
utf8 = utf8.as_utf8
- utf8.valid?.should be_false
- @utf8.valid?.should be_true
+ assert !utf8.valid?
+ assert @utf8.valid?
- "provided by Cristian Rodr\355guez.".as_utf8.should_not be_valid
+ assert !"provided by Cristian Rodr\355guez.".as_utf8.valid?
end
- it "should test validity using a maximum codepoint" do
+ test "should test validity using a maximum codepoint" do
highest_codepoint = @utf8.codepoints.to_a.max
- @utf8.valid?(highest_codepoint).should be_true
- @utf8.valid?(highest_codepoint-1).should be_false
+ assert @utf8.valid?(highest_codepoint)
+ assert !@utf8.valid?(highest_codepoint-1)
end
end
- it "[Regexp] syntax shouldn't be supported yet" do
- lambda {
+ test "[Regexp] syntax shouldn't be supported yet" do
+ assert_raise ArgumentError do
@utf8[/a/]
- }.should raise_error(ArgumentError)
+ end
end
- it "[Regexp, match_index] syntax shouldn't be supported yet" do
- lambda {
+ test "[Regexp, match_index] syntax shouldn't be supported yet" do
+ assert_raise ArgumentError do
@utf8[/(a)/, 1]
- }.should raise_error(ArgumentError)
+ end
+ end
+
+ context "#ascii_only" do
+ test "should return true for a string within the low ascii range" do
+ assert "asdf".as_utf8.ascii_only?
+ end
+
+ test "should return false for a string within the low ascii range" do
+ assert !@char_array.first.as_utf8.ascii_only?
+ end
end
end
\ No newline at end of file