page.rb in spidr-0.1.4

- old
+ new

@@ -1,7 +1,7 @@
 require 'uri'
-require 'hpricot'
+require 'nokogiri'
 
 module Spidr
   class Page
 
     # URL of the page
@@ -183,27 +183,32 @@
     def body
       @response.body
     end
 
     #
-    # Returns an Hpricot::Doc if the page represents a HTML document,
-    # returns +nil+ otherwise.
+    # If the page has a <tt>text/html</tt> content-type, a
+    # Nokogiri::HTML::Document object will be returned. If the page has a
+    # <tt>text/xml</tt> content-type, a Nokogiri::XML::Document object
+    # will be returned. Other content-types will cause +nil+ to be
+    # returned.
     #
     def doc
       if html?
-        return @doc ||= Hpricot(body)
+        return @doc ||= Nokogiri::HTML(body)
+      elsif xml?
+        return @doc ||= Nokogiri::XML(body)
       end
     end
 
     #
     # Returns all links from the HTML page.
     #
     def links
       urls = []
 
       if html?
-        doc.search('a[@href]') do |a|
-          url = a.attributes['href'].strip
+        self.doc.search('a[@href]').each do |a|
+          url = a.get_attribute('href')
 
           urls << url unless url.empty?
         end
       end