page.rb in spidr-0.1.1

- old
+ new

@@ -5,10 +5,13 @@
   class Page
 
     # URL of the page
     attr_reader :url
 
+    # HTTP Response
+    attr_reader :response
+
     # Body returned for the page
     attr_reader :body
 
     # Headers returned with the body
     attr_reader :headers
@@ -22,17 +25,89 @@
       @response = response
       @doc = nil
     end
 
     #
+    # Returns the response code from the page.
+    #
+    def code
+      @response.code
+    end
+
+    #
+    # Returns +true+ if the response code is 200, returns +false+ otherwise.
+    #
+    def is_ok?
+      code == 200
+    end
+
+    #
+    # Returns +true+ if the response code is 301 or 307, returns +false+
+    # otherwise.
+    #
+    def is_redirect?
+      (code == 301 || code == 307)
+    end
+
+    #
+    # Returns +true+ if the response code is 308, returns +false+ otherwise.
+    #
+    def timedout?
+      code == 308
+    end
+
+    #
+    # Returns +true+ if the response code is 400, returns +false+ otherwise.
+    #
+    def bad_request?
+      code == 400
+    end
+
+    #
+    # Returns +true+ if the response code is 401, returns +false+ otherwise.
+    #
+    def is_unauthorized?
+      code == 401
+    end
+
+    #
+    # Returns +true+ if the response code is 403, returns +false+ otherwise.
+    #
+    def is_forbidden?
+      code == 403
+    end
+
+    #
+    # Returns +true+ if the response code is 404, returns +false+ otherwise.
+    #
+    def is_missing?
+      code == 404
+    end
+
+    #
+    # Returns +true+ if the response code is 500, returns +false+ otherwise.
+    #
+    def had_internal_server_error?
+      code == 500
+    end
+
+    #
     # Returns the content-type of the page.
     #
     def content_type
       @response['Content-Type']
     end
 
     #
+    # Returns +true+ if the page is a plain text document, returns +false+
+    # otherwise.
+    #
+    def plain_text?
+      (content_type =~ /text\/plain/) == 0
+    end
+
+    #
     # Returns +true+ if the page is a HTML document, returns +false+
     # otherwise.
     #
     def html?
       (content_type =~ /text\/html/) == 0
@@ -77,10 +152,34 @@
     def atom?
       (content_type =~ /application\/atom\+xml/) == 0
     end
 
     #
+    # Returns +true+ if the page is a MS Word document, returns +false+
+    # otherwise.
+    #
+    def ms_word?
+      (content_type =~ /application\/msword/) == 0
+    end
+
+    #
+    # Returns +true+ if the page is a PDF document, returns +false+
+    # otherwise.
+    #
+    def pdf?
+      (content_type =~ /application\/pdf/) == 0
+    end
+
+    #
+    # Returns +true+ if the page is a ZIP archive, returns +false+
+    # otherwise.
+    #
+    def zip?
+      (content_type =~ /application\/zip/) == 0
+    end
+
+    #
     # Returns the body of the page in +String+ form.
     #
     def body
       @response.body
     end
@@ -120,37 +219,24 @@
     #
     # Converts the specified _link_ into an absolute URL
     # based on the url of the page.
     #
     def to_absolute(link)
+      # clean the link
       link = URI.encode(link.to_s.gsub(/#.*$/,''))
-      relative = URI(link)
 
-      if relative.scheme.nil?
-        new_url = @url.clone
-
-        if relative.path[0..0] == '/'
-          new_url.path = relative.path
-        elsif relative.path[-1..-1] == '/'
-          new_url.path = File.expand_path(File.join(new_url.path,relative.path))
-        elsif !(relative.path.empty?)
-          new_url.path = File.expand_path(File.join(File.dirname(new_url.path),relative.path))
-        end
-
-        return new_url
-      end
-
-      return relative
+      relative = URI(link)
+      return @url.merge(relative)
     end
 
     #
     # Provides transparent access to the values in the +headers+ +Hash+.
     #
     def method_missing(sym,*args,&block)
       if (args.empty? && block.nil?)
         name = sym.id2name.sub('_','-')
 
-        return @response[name] if @response.has_key?(name)
+        return @response[name] if @response.key?(name)
       end
 
       return super(sym,*args,&block)
     end