lib/spidr/page.rb in spidr-0.1.0 vs lib/spidr/page.rb in spidr-0.1.1

- old
+ new

@@ -5,10 +5,13 @@ class Page # URL of the page attr_reader :url + # HTTP Response + attr_reader :response + # Body returned for the page attr_reader :body # Headers returned with the body attr_reader :headers @@ -22,17 +25,89 @@ @response = response @doc = nil end # + # Returns the response code from the page. + # + def code + @response.code + end + + # + # Returns +true+ if the response code is 200, returns +false+ otherwise. + # + def is_ok? + code == 200 + end + + # + # Returns +true+ if the response code is 301 or 307, returns +false+ + # otherwise. + # + def is_redirect? + (code == 301 || code == 307) + end + + # + # Returns +true+ if the response code is 308, returns +false+ otherwise. + # + def timedout? + code == 308 + end + + # + # Returns +true+ if the response code is 400, returns +false+ otherwise. + # + def bad_request? + code == 400 + end + + # + # Returns +true+ if the response code is 401, returns +false+ otherwise. + # + def is_unauthorized? + code == 401 + end + + # + # Returns +true+ if the response code is 403, returns +false+ otherwise. + # + def is_forbidden? + code == 403 + end + + # + # Returns +true+ if the response code is 404, returns +false+ otherwise. + # + def is_missing? + code == 404 + end + + # + # Returns +true+ if the response code is 500, returns +false+ otherwise. + # + def had_internal_server_error? + code == 500 + end + + # # Returns the content-type of the page. # def content_type @response['Content-Type'] end # + # Returns +true+ if the page is a plain text document, returns +false+ + # otherwise. + # + def plain_text? + (content_type =~ /text\/plain/) == 0 + end + + # # Returns +true+ if the page is a HTML document, returns +false+ # otherwise. # def html? (content_type =~ /text\/html/) == 0 @@ -77,10 +152,34 @@ def atom? (content_type =~ /application\/atom\+xml/) == 0 end # + # Returns +true+ if the page is a MS Word document, returns +false+ + # otherwise. + # + def ms_word? + (content_type =~ /application\/msword/) == 0 + end + + # + # Returns +true+ if the page is a PDF document, returns +false+ + # otherwise. + # + def pdf? + (content_type =~ /application\/pdf/) == 0 + end + + # + # Returns +true+ if the page is a ZIP archive, returns +false+ + # otherwise. + # + def zip? + (content_type =~ /application\/zip/) == 0 + end + + # # Returns the body of the page in +String+ form. # def body @response.body end @@ -120,37 +219,24 @@ # # Converts the specified _link_ into an absolute URL # based on the url of the page. # def to_absolute(link) + # clean the link link = URI.encode(link.to_s.gsub(/#.*$/,'')) - relative = URI(link) - if relative.scheme.nil? - new_url = @url.clone - - if relative.path[0..0] == '/' - new_url.path = relative.path - elsif relative.path[-1..-1] == '/' - new_url.path = File.expand_path(File.join(new_url.path,relative.path)) - elsif !(relative.path.empty?) - new_url.path = File.expand_path(File.join(File.dirname(new_url.path),relative.path)) - end - - return new_url - end - - return relative + relative = URI(link) + return @url.merge(relative) end # # Provides transparent access to the values in the +headers+ +Hash+. # def method_missing(sym,*args,&block) if (args.empty? && block.nil?) name = sym.id2name.sub('_','-') - return @response[name] if @response.has_key?(name) + return @response[name] if @response.key?(name) end return super(sym,*args,&block) end