string_spec.rb in utf8-0.1.3

- old
+ new

@@ -5,10 +5,11 @@
   before(:each) do
     @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
     @str = @char_array.join
     @utf8 = @str.as_utf8
     @utf8_len = @char_array.size
+    @codepoints = @char_array.map{|c| c.unpack('U').first}
   end
 
   it "should blow up on invalid utf8 chars" do
     # lets cut right into the middle of a sequence so we know it's bad
     @str.force_encoding('binary') if @str.respond_to?(:force_encoding)
@@ -65,22 +66,39 @@
           Enumerable::Enumerator
         end
       end
 
       @utf8.chars.class.should eql(klass)
-      i=0
       @utf8.chars do |char|
         char.should_not be_nil
-        i+=1
       end
       joined = @utf8.chars.to_a.join
       @utf8.should eql(joined)
       @utf8.chars.to_a.size.should eql(@utf8_len)
       @utf8.chars.to_a.should eql(@char_array)
     end
   end
 
+  context "#codepoints and #each_codepoint" do
+    it "should be utf8-aware" do
+      klass = begin
+        if defined? Encoding
+          Enumerator
+        else
+          Enumerable::Enumerator
+        end
+      end
+
+      @utf8.codepoints.class.should eql(klass)
+      @utf8.codepoints do |codepoint|
+        codepoint.should_not be_nil
+      end
+      @utf8.codepoints.to_a.size.should eql(@codepoints.size)
+      @utf8.codepoints.to_a.should eql(@codepoints)
+    end
+  end
+
   context "[offset] syntax" do
     it "should be utf8-aware" do
       @char_array.each_with_index do |char, i|
         utf8_char = @utf8[i]
         utf8_char.should eql(char)
@@ -152,9 +170,29 @@
 
     it "should return nil for an out of range offset or length" do
       @utf8[100..100].should be_nil
       @utf8[-100..100].should be_nil
       @utf8[0..-100].should eql("")
+    end
+  end
+
+  context "#valid?" do
+    it "should test validity" do
+      # lets cut right into the middle of a sequence so we know it's bad
+      @str.force_encoding('binary') if @str.respond_to?(:force_encoding)
+      utf8 = @str[0..1]
+      utf8.force_encoding('utf-8') if utf8.respond_to?(:force_encoding)
+      utf8 = utf8.as_utf8
+
+      utf8.valid?.should be_false
+      @utf8.valid?.should be_true
+    end
+
+    it "should test validity using a maximum codepoint" do
+      highest_codepoint = @utf8.codepoints.to_a.max
+
+      @utf8.valid?(highest_codepoint).should be_true
+      @utf8.valid?(highest_codepoint-1).should be_false
     end
   end
 
   it "[Regexp] syntax shouldn't be supported yet" do
     lambda {
\ No newline at end of file