lib/spidr/page.rb in spidr-0.5.0 vs lib/spidr/page.rb in spidr-0.6.0

- old
+ new

@@ -1,9 +1,5 @@ -require 'spidr/page/headers' -require 'spidr/page/body' -require 'spidr/page/links' - module Spidr # # Represents a requested page from a website. # class Page @@ -32,47 +28,94 @@ @headers = response.to_hash @doc = nil end # - # The meta-redirect links of the page. + # The body of the response. # - # @return [Array<String>] - # All meta-redirect links in the page. + # @return [String] + # The body of the response. # - # @deprecated - # Deprecated in 0.3.0 and will be removed in 0.4.0. - # Use {#meta_redirects} instead. + def body + (response.body || '') + end + + alias to_s body + # - def meta_redirect - STDERR.puts 'DEPRECATION: Spidr::Page#meta_redirect will be removed in 0.3.0' - STDERR.puts 'DEPRECATION: Use Spidr::Page#meta_redirects instead' + # Returns a parsed document object for HTML, XML, RSS and Atom pages. + # + # @return [Nokogiri::HTML::Document, Nokogiri::XML::Document, nil] + # The document that represents HTML or XML pages. + # Returns `nil` if the page is neither HTML, XML, RSS, Atom or if + # the page could not be parsed properly. + # + # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Document.html + # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/HTML/Document.html + # + def doc + unless body.empty? + doc_class = if html? + Nokogiri::HTML::Document + elsif rss? || atom? || xml? || xsl? + Nokogiri::XML::Document + end - meta_redirects + if doc_class + begin + @doc ||= doc_class.parse(body, @url.to_s, content_charset) + rescue + end + end + end end # - # Determines if the response code is `300`, `301`, `302`, `303` - # or `307`. Also checks for "soft" redirects added at the page - # level by a meta refresh tag. + # Searches the document for XPath or CSS Path paths. # - # @return [Boolean] - # Specifies whether the response code is a HTTP Redirect code. + # @param [Array<String>] paths + # CSS or XPath expressions to search the document with. # - def is_redirect? - case code - when 300..303, 307 - true - when 200 - meta_redirect? + # @return [Array] + # The matched nodes from the document. + # Returns an empty Array if no nodes were matched, or if the page + # is not an HTML or XML document. + # + # @example + # page.search('//a[@href]') + # + # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Node.html#M000239 + # + def search(*paths) + if doc + doc.search(*paths) else - false + [] end end - alias redirect? is_redirect? + # + # Searches for the first occurrence an XPath or CSS Path expression. + # + # @return [Nokogiri::HTML::Node, Nokogiri::XML::Node, nil] + # The first matched node. Returns `nil` if no nodes could be matched, + # or if the page is not a HTML or XML document. + # + # @example + # page.at('//title') + # + # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Node.html#M000251 + # + def at(*arguments) + if doc + doc.at(*arguments) + end + end + alias / search + alias % at + protected # # Provides transparent access to the values in {#headers}. # @@ -88,11 +131,11 @@ # @raise [NoMethodError] # The missing method did not map to a header in {#headers}. # def method_missing(name,*arguments,&block) if (arguments.empty? && block.nil?) - header_name = name.to_s.sub('_','-') + header_name = name.to_s.tr('_','-') if @response.key?(header_name) return @response[header_name] end end @@ -100,5 +143,10 @@ return super(name,*arguments,&block) end end end + +require 'spidr/page/status_codes' +require 'spidr/page/content_types' +require 'spidr/page/cookies' +require 'spidr/page/html'