page.rb in spidr-0.2.4

- old
+ new

@@ -59,17 +59,23 @@
     end
 
     alias ok? is_ok?
 
     #
-    # Determines if the response code is `301` or `307`.
+    # Determines if the response code is `300`, `301`, `302`, `303`
+    # or `307`.
     #
     # @return [Boolean]
-    #   Specifies whether the response code is `301` or `307`.
+    #   Specifies whether the response code is a HTTP Redirect code.
     #
     def is_redirect?
-      (code == 301 || code == 307)
+      case code
+      when 300..303, 307
+        true
+      else
+        false
+      end
     end
 
     alias redirect? is_redirect?
 
     #
@@ -143,11 +149,11 @@
     #
     # @return [String]
     #   The Content-Type of the page.
     #
     def content_type
-      @response['Content-Type']
+      (@response['Content-Type'] || '')
     end
 
     #
     # The content types of the page.
     #
@@ -155,21 +161,21 @@
     #   The values within the Content-Type header.
     #
     # @since 0.2.2
     #
     def content_types
-      @headers['content-type']
+      (@headers['content-type'] || [])
     end
 
     #
     # Determines if the page is plain-text.
     #
     # @return [Boolean]
     #   Specifies whether the page is plain-text.
     #
     def plain_text?
-      content_types.include?('text/plain')
+      is_content_type?('text/plain')
     end
 
     alias txt? plain_text?
 
     #
@@ -177,103 +183,103 @@
     #
     # @return [Boolean]
     #   Specifies whether the page is HTML document.
     #
     def html?
-      content_types.include?('text/html')
+      is_content_type?('text/html')
     end
 
     #
     # Determines if the page is XML document.
     #
     # @return [Boolean]
     #   Specifies whether the page is XML document.
     #
     def xml?
-      content_types.include?('text/xml')
+      is_content_type?('text/xml')
     end
 
     #
     # Determines if the page is XML Stylesheet (XSL).
     #
     # @return [Boolean]
     #   Specifies whether the page is XML Stylesheet (XSL).
     #
     def xsl?
-      content_types.include?('text/xsl')
+      is_content_type?('text/xsl')
     end
 
     #
     # Determines if the page is JavaScript.
     #
     # @return [Boolean]
     #   Specifies whether the page is JavaScript.
     #
     def javascript?
-      content_types.include?('text/javascript') || \
-        content_types.include?('application/javascript')
+      is_content_type?('text/javascript') || \
+        is_content_type?('application/javascript')
     end
 
     #
     # Determines if the page is a CSS stylesheet.
     #
     # @return [Boolean]
     #   Specifies whether the page is a CSS stylesheet.
     #
     def css?
-      content_types.include?('text/css')
+      is_content_type?('text/css')
     end
 
     #
     # Determines if the page is a RSS feed.
     #
     # @return [Boolean]
     #   Specifies whether the page is a RSS feed.
     #
     def rss?
-      content_types.include?('application/rss+xml') || \
-        content_types.include?('application/rdf+xml')
+      is_content_type?('application/rss+xml') || \
+        is_content_type?('application/rdf+xml')
     end
 
     #
     # Determines if the page is an Atom feed.
     #
     # @return [Boolean]
     #   Specifies whether the page is an Atom feed.
     #
     def atom?
-      content_types.include?('application/atom+xml')
+      is_content_type?('application/atom+xml')
     end
 
     #
     # Determines if the page is a MS Word document.
     #
     # @return [Boolean]
     #   Specifies whether the page is a MS Word document.
     #
     def ms_word?
-      content_types.include?('application/msword')
+      is_content_type?('application/msword')
     end
 
     #
     # Determines if the page is a PDF document.
     #
     # @return [Boolean]
     #   Specifies whether the page is a PDF document.
     #
     def pdf?
-      content_types.include?('application/pdf')
+      is_content_type?('application/pdf')
     end
 
     #
     # Determines if the page is a ZIP archive.
     #
     # @return [Boolean]
     #   Specifies whether the page is a ZIP archive.
     #
     def zip?
-      content_types.include?('application/zip')
+      is_content_type?('application/zip')
     end
 
     #
     # The raw Cookie String sent along with the page.
     #
@@ -327,11 +333,11 @@
     #
     # @return [String]
     #   The body of the response.
     #
     def body
-      @response.body
+      (@response.body || '')
     end
 
     #
     # Returns a parsed document object for HTML, XML, RSS and Atom pages.
     #
@@ -342,11 +348,11 @@
     #
     # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Document.html
     # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/HTML/Document.html
     #
     def doc
-      return nil if (body.nil? || body.empty?)
+      return nil if body.empty?
 
       begin
         if html?
           return @doc ||= Nokogiri::HTML(body)
         elsif (xml? || xsl? || rss? || atom?)
@@ -373,14 +379,14 @@
     #
     # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Node.html#M000239
     #
     def search(*paths)
       if doc
-        return doc.search(*paths)
+        doc.search(*paths)
+      else
+        []
       end
-
-      return []
     end
 
     #
     # Searches for the first occurrence an XPath or CSS Path expression.
     #
@@ -393,14 +399,12 @@
     #
     # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Node.html#M000251
     #
     def at(*arguments)
       if doc
-        return doc.at(*arguments)
+        doc.at(*arguments)
       end
-
-      return nil
     end
 
     alias / search
     alias % at
 
@@ -410,11 +414,11 @@
     # @return [String]
     #   The inner-text of the title element of the page.
     #
     def title
       if (node = at('//title'))
-        return node.inner_text
+        node.inner_text
       end
     end
 
     #
     # The links from within the page.
@@ -428,12 +432,11 @@
 
       add_url = lambda { |url|
         urls << url unless (url.nil? || url.empty?)
       }
 
-      case code
-      when 300..303, 307
+      if self.is_redirect?
         location = @headers['location']
 
         if location.kind_of?(Array)
           # handle multiple location URLs
           location.each(&add_url)
@@ -505,10 +508,26 @@
     end
 
     protected
 
     #
+    # Determines if any of the content-types of the page include a given
+    # type.
+    #
+    # @param [String] type
+    #   The content-type to test for.
+    #
+    # @return [Boolean]
+    #   Specifies whether the page includes the given content-type.
+    #
+    # @since 0.2.4
+    #
+    def is_content_type?(type)
+      content_types.any? { |content| content.include?(type) }
+    end
+
+    #
     # Provides transparent access to the values in `headers`.
     #
     def method_missing(sym,*args,&block)
       if (args.empty? && block.nil?)
         name = sym.id2name.sub('_','-')
@@ -516,8 +535,8 @@
         return @response[name] if @response.key?(name)
       end
 
       return super(sym,*args,&block)
     end
-
+  
   end
 end