lib/spidr/page.rb in spidr-0.1.0 vs lib/spidr/page.rb in spidr-0.1.1
- old
+ new
@@ -5,10 +5,13 @@
class Page
# URL of the page
attr_reader :url
+ # HTTP Response
+ attr_reader :response
+
# Body returned for the page
attr_reader :body
# Headers returned with the body
attr_reader :headers
@@ -22,17 +25,89 @@
@response = response
@doc = nil
end
#
+ # Returns the response code from the page.
+ #
+ def code
+ @response.code
+ end
+
+ #
+ # Returns +true+ if the response code is 200, returns +false+ otherwise.
+ #
+ def is_ok?
+ code == 200
+ end
+
+ #
+ # Returns +true+ if the response code is 301 or 307, returns +false+
+ # otherwise.
+ #
+ def is_redirect?
+ (code == 301 || code == 307)
+ end
+
+ #
+ # Returns +true+ if the response code is 308, returns +false+ otherwise.
+ #
+ def timedout?
+ code == 308
+ end
+
+ #
+ # Returns +true+ if the response code is 400, returns +false+ otherwise.
+ #
+ def bad_request?
+ code == 400
+ end
+
+ #
+ # Returns +true+ if the response code is 401, returns +false+ otherwise.
+ #
+ def is_unauthorized?
+ code == 401
+ end
+
+ #
+ # Returns +true+ if the response code is 403, returns +false+ otherwise.
+ #
+ def is_forbidden?
+ code == 403
+ end
+
+ #
+ # Returns +true+ if the response code is 404, returns +false+ otherwise.
+ #
+ def is_missing?
+ code == 404
+ end
+
+ #
+ # Returns +true+ if the response code is 500, returns +false+ otherwise.
+ #
+ def had_internal_server_error?
+ code == 500
+ end
+
+ #
# Returns the content-type of the page.
#
def content_type
@response['Content-Type']
end
#
+ # Returns +true+ if the page is a plain text document, returns +false+
+ # otherwise.
+ #
+ def plain_text?
+ (content_type =~ /text\/plain/) == 0
+ end
+
+ #
# Returns +true+ if the page is a HTML document, returns +false+
# otherwise.
#
def html?
(content_type =~ /text\/html/) == 0
@@ -77,10 +152,34 @@
def atom?
(content_type =~ /application\/atom\+xml/) == 0
end
#
+ # Returns +true+ if the page is a MS Word document, returns +false+
+ # otherwise.
+ #
+ def ms_word?
+ (content_type =~ /application\/msword/) == 0
+ end
+
+ #
+ # Returns +true+ if the page is a PDF document, returns +false+
+ # otherwise.
+ #
+ def pdf?
+ (content_type =~ /application\/pdf/) == 0
+ end
+
+ #
+ # Returns +true+ if the page is a ZIP archive, returns +false+
+ # otherwise.
+ #
+ def zip?
+ (content_type =~ /application\/zip/) == 0
+ end
+
+ #
# Returns the body of the page in +String+ form.
#
def body
@response.body
end
@@ -120,37 +219,24 @@
#
# Converts the specified _link_ into an absolute URL
# based on the url of the page.
#
def to_absolute(link)
+ # clean the link
link = URI.encode(link.to_s.gsub(/#.*$/,''))
- relative = URI(link)
- if relative.scheme.nil?
- new_url = @url.clone
-
- if relative.path[0..0] == '/'
- new_url.path = relative.path
- elsif relative.path[-1..-1] == '/'
- new_url.path = File.expand_path(File.join(new_url.path,relative.path))
- elsif !(relative.path.empty?)
- new_url.path = File.expand_path(File.join(File.dirname(new_url.path),relative.path))
- end
-
- return new_url
- end
-
- return relative
+ relative = URI(link)
+ return @url.merge(relative)
end
#
# Provides transparent access to the values in the +headers+ +Hash+.
#
def method_missing(sym,*args,&block)
if (args.empty? && block.nil?)
name = sym.id2name.sub('_','-')
- return @response[name] if @response.has_key?(name)
+ return @response[name] if @response.key?(name)
end
return super(sym,*args,&block)
end