lib/spidr/page.rb in spidr-0.2.3 vs lib/spidr/page.rb in spidr-0.2.4

- old
+ new

@@ -59,17 +59,23 @@ end alias ok? is_ok? # - # Determines if the response code is `301` or `307`. + # Determines if the response code is `300`, `301`, `302`, `303` + # or `307`. # # @return [Boolean] - # Specifies whether the response code is `301` or `307`. + # Specifies whether the response code is a HTTP Redirect code. # def is_redirect? - (code == 301 || code == 307) + case code + when 300..303, 307 + true + else + false + end end alias redirect? is_redirect? # @@ -143,11 +149,11 @@ # # @return [String] # The Content-Type of the page. # def content_type - @response['Content-Type'] + (@response['Content-Type'] || '') end # # The content types of the page. # @@ -155,21 +161,21 @@ # The values within the Content-Type header. # # @since 0.2.2 # def content_types - @headers['content-type'] + (@headers['content-type'] || []) end # # Determines if the page is plain-text. # # @return [Boolean] # Specifies whether the page is plain-text. # def plain_text? - content_types.include?('text/plain') + is_content_type?('text/plain') end alias txt? plain_text? # @@ -177,103 +183,103 @@ # # @return [Boolean] # Specifies whether the page is HTML document. # def html? - content_types.include?('text/html') + is_content_type?('text/html') end # # Determines if the page is XML document. # # @return [Boolean] # Specifies whether the page is XML document. # def xml? - content_types.include?('text/xml') + is_content_type?('text/xml') end # # Determines if the page is XML Stylesheet (XSL). # # @return [Boolean] # Specifies whether the page is XML Stylesheet (XSL). # def xsl? - content_types.include?('text/xsl') + is_content_type?('text/xsl') end # # Determines if the page is JavaScript. # # @return [Boolean] # Specifies whether the page is JavaScript. # def javascript? - content_types.include?('text/javascript') || \ - content_types.include?('application/javascript') + is_content_type?('text/javascript') || \ + is_content_type?('application/javascript') end # # Determines if the page is a CSS stylesheet. # # @return [Boolean] # Specifies whether the page is a CSS stylesheet. # def css? - content_types.include?('text/css') + is_content_type?('text/css') end # # Determines if the page is a RSS feed. # # @return [Boolean] # Specifies whether the page is a RSS feed. # def rss? - content_types.include?('application/rss+xml') || \ - content_types.include?('application/rdf+xml') + is_content_type?('application/rss+xml') || \ + is_content_type?('application/rdf+xml') end # # Determines if the page is an Atom feed. # # @return [Boolean] # Specifies whether the page is an Atom feed. # def atom? - content_types.include?('application/atom+xml') + is_content_type?('application/atom+xml') end # # Determines if the page is a MS Word document. # # @return [Boolean] # Specifies whether the page is a MS Word document. # def ms_word? - content_types.include?('application/msword') + is_content_type?('application/msword') end # # Determines if the page is a PDF document. # # @return [Boolean] # Specifies whether the page is a PDF document. # def pdf? - content_types.include?('application/pdf') + is_content_type?('application/pdf') end # # Determines if the page is a ZIP archive. # # @return [Boolean] # Specifies whether the page is a ZIP archive. # def zip? - content_types.include?('application/zip') + is_content_type?('application/zip') end # # The raw Cookie String sent along with the page. # @@ -327,11 +333,11 @@ # # @return [String] # The body of the response. # def body - @response.body + (@response.body || '') end # # Returns a parsed document object for HTML, XML, RSS and Atom pages. # @@ -342,11 +348,11 @@ # # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Document.html # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/HTML/Document.html # def doc - return nil if (body.nil? || body.empty?) + return nil if body.empty? begin if html? return @doc ||= Nokogiri::HTML(body) elsif (xml? || xsl? || rss? || atom?) @@ -373,14 +379,14 @@ # # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Node.html#M000239 # def search(*paths) if doc - return doc.search(*paths) + doc.search(*paths) + else + [] end - - return [] end # # Searches for the first occurrence an XPath or CSS Path expression. # @@ -393,14 +399,12 @@ # # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Node.html#M000251 # def at(*arguments) if doc - return doc.at(*arguments) + doc.at(*arguments) end - - return nil end alias / search alias % at @@ -410,11 +414,11 @@ # @return [String] # The inner-text of the title element of the page. # def title if (node = at('//title')) - return node.inner_text + node.inner_text end end # # The links from within the page. @@ -428,12 +432,11 @@ add_url = lambda { |url| urls << url unless (url.nil? || url.empty?) } - case code - when 300..303, 307 + if self.is_redirect? location = @headers['location'] if location.kind_of?(Array) # handle multiple location URLs location.each(&add_url) @@ -505,10 +508,26 @@ end protected # + # Determines if any of the content-types of the page include a given + # type. + # + # @param [String] type + # The content-type to test for. + # + # @return [Boolean] + # Specifies whether the page includes the given content-type. + # + # @since 0.2.4 + # + def is_content_type?(type) + content_types.any? { |content| content.include?(type) } + end + + # # Provides transparent access to the values in `headers`. # def method_missing(sym,*args,&block) if (args.empty? && block.nil?) name = sym.id2name.sub('_','-') @@ -516,8 +535,8 @@ return @response[name] if @response.key?(name) end return super(sym,*args,&block) end - + end end