page.rb in spidr-0.2.2

- old
+ new

@@ -1,13 +1,17 @@
 require 'spidr/extensions/uri'
 
+require 'set'
 require 'uri'
 require 'nokogiri'
 
 module Spidr
   class Page
 
+    # Reserved names used within Cookie strings
+    RESERVED_COOKIE_NAMES = Set['path', 'expires', 'domain']
+
     # URL of the page
     attr_reader :url
 
     # HTTP Response
     attr_reader :response
@@ -140,17 +144,29 @@
     def content_type
       @response['Content-Type']
     end
 
     #
+    # The content types of the page.
+    #
+    # @return [Array<String>]
+    #   The values within the Content-Type header.
+    #
+    # @since 0.2.2
+    #
+    def content_types
+      @headers['content-type']
+    end
+
+    #
     # Determines if the page is plain-text.
     #
     # @return [Boolean]
     #   Specifies whether the page is plain-text.
     #
     def plain_text?
-      (content_type =~ /text\/plain/) == 0
+      content_types.include?('text/plain')
     end
 
     alias txt? plain_text?
 
     #
@@ -158,100 +174,148 @@
     #
     # @return [Boolean]
     #   Specifies whether the page is HTML document.
     #
     def html?
-      (content_type =~ /text\/html/) == 0
+      content_types.include?('text/html')
     end
 
     #
     # Determines if the page is XML document.
     #
     # @return [Boolean]
     #   Specifies whether the page is XML document.
     #
     def xml?
-      (content_type =~ /text\/xml/) == 0
+      content_types.include?('text/xml')
     end
 
     #
     # Determines if the page is XML Stylesheet (XSL).
     #
     # @return [Boolean]
     #   Specifies whether the page is XML Stylesheet (XSL).
     #
     def xsl?
-      (content_type =~ /text\/xsl/) == 0
+      content_types.include?('text/xsl')
     end
 
     #
     # Determines if the page is JavaScript.
     #
     # @return [Boolean]
     #   Specifies whether the page is JavaScript.
     #
     def javascript?
-      (content_type =~ /(text|application)\/javascript/) == 0
+      content_types.include?('text/javascript') || \
+        content_types.include?('application/javascript')
     end
 
     #
     # Determines if the page is a CSS stylesheet.
     #
     # @return [Boolean]
     #   Specifies whether the page is a CSS stylesheet.
     #
     def css?
-      (content_type =~ /text\/css/) == 0
+      content_types.include?('text/css')
     end
 
     #
     # Determines if the page is a RSS feed.
     #
     # @return [Boolean]
     #   Specifies whether the page is a RSS feed.
     #
     def rss?
-      (content_type =~ /application\/(rss|rdf)\+xml/) == 0
+      content_types.include?('application/rss+xml') || \
+        content_types.include?('application/rdf+xml')
     end
 
     #
     # Determines if the page is an Atom feed.
     #
     # @return [Boolean]
     #   Specifies whether the page is an Atom feed.
     #
     def atom?
-      (content_type =~ /application\/atom\+xml/) == 0
+      content_types.include?('application/atom+xml')
     end
 
     #
     # Determines if the page is a MS Word document.
     #
     # @return [Boolean]
     #   Specifies whether the page is a MS Word document.
     #
     def ms_word?
-      (content_type =~ /application\/msword/) == 0
+      content_types.include?('application/msword')
     end
 
     #
     # Determines if the page is a PDF document.
     #
     # @return [Boolean]
     #   Specifies whether the page is a PDF document.
     #
     def pdf?
-      (content_type =~ /application\/pdf/) == 0
+      content_types.include?('application/pdf')
     end
 
     #
     # Determines if the page is a ZIP archive.
     #
     # @return [Boolean]
     #   Specifies whether the page is a ZIP archive.
     #
     def zip?
-      (content_type =~ /application\/zip/) == 0
+      content_types.include?('application/zip')
+    end
+
+    #
+    # The raw Cookie String sent along with the page.
+    #
+    # @return [String]
+    #   The raw Cookie from the response.
+    #
+    # @since 0.2.2
+    #
+    def cookie
+      (@response['Set-Cookie'] || '')
+    end
+
+    #
+    # The Cookie values sent along with the page.
+    #
+    # @return [Array<String>]
+    #   The Cookies from the response.
+    #
+    # @since 0.2.2
+    #
+    def cookies
+      (@headers['set-cookie'] || [])
+    end
+
+    #
+    # The Cookie key -> value pairs returned with the response.
+    #
+    # @return [Hash{String => String}]
+    #   The cookie keys and values.
+    #
+    # @since 0.2.2
+    #
+    def cookie_params
+      params = {}
+
+      cookies.each do |key_value|
+        key, value = key_value.split('=',2)
+
+        next if RESERVED_COOKIE_NAMES.include?(key)
+
+        params[key] = (value || '')
+      end
+
+      return params
     end
 
     #
     # The body of the response.
     #