spec/string_spec.rb in utf8-0.1.2 vs spec/string_spec.rb in utf8-0.1.3
- old
+ new
@@ -5,10 +5,11 @@
before(:each) do
@char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
@str = @char_array.join
@utf8 = @str.as_utf8
@utf8_len = @char_array.size
+ @codepoints = @char_array.map{|c| c.unpack('U').first}
end
it "should blow up on invalid utf8 chars" do
# lets cut right into the middle of a sequence so we know it's bad
@str.force_encoding('binary') if @str.respond_to?(:force_encoding)
@@ -65,22 +66,39 @@
Enumerable::Enumerator
end
end
@utf8.chars.class.should eql(klass)
- i=0
@utf8.chars do |char|
char.should_not be_nil
- i+=1
end
joined = @utf8.chars.to_a.join
@utf8.should eql(joined)
@utf8.chars.to_a.size.should eql(@utf8_len)
@utf8.chars.to_a.should eql(@char_array)
end
end
+ context "#codepoints and #each_codepoint" do
+ it "should be utf8-aware" do
+ klass = begin
+ if defined? Encoding
+ Enumerator
+ else
+ Enumerable::Enumerator
+ end
+ end
+
+ @utf8.codepoints.class.should eql(klass)
+ @utf8.codepoints do |codepoint|
+ codepoint.should_not be_nil
+ end
+ @utf8.codepoints.to_a.size.should eql(@codepoints.size)
+ @utf8.codepoints.to_a.should eql(@codepoints)
+ end
+ end
+
context "[offset] syntax" do
it "should be utf8-aware" do
@char_array.each_with_index do |char, i|
utf8_char = @utf8[i]
utf8_char.should eql(char)
@@ -152,9 +170,29 @@
it "should return nil for an out of range offset or length" do
@utf8[100..100].should be_nil
@utf8[-100..100].should be_nil
@utf8[0..-100].should eql("")
+ end
+ end
+
+ context "#valid?" do
+ it "should test validity" do
+ # lets cut right into the middle of a sequence so we know it's bad
+ @str.force_encoding('binary') if @str.respond_to?(:force_encoding)
+ utf8 = @str[0..1]
+ utf8.force_encoding('utf-8') if utf8.respond_to?(:force_encoding)
+ utf8 = utf8.as_utf8
+
+ utf8.valid?.should be_false
+ @utf8.valid?.should be_true
+ end
+
+ it "should test validity using a maximum codepoint" do
+ highest_codepoint = @utf8.codepoints.to_a.max
+
+ @utf8.valid?(highest_codepoint).should be_true
+ @utf8.valid?(highest_codepoint-1).should be_false
end
end
it "[Regexp] syntax shouldn't be supported yet" do
lambda {
\ No newline at end of file