lib/spidr/page.rb in spidr-0.5.0 vs lib/spidr/page.rb in spidr-0.6.0
- old
+ new
@@ -1,9 +1,5 @@
-require 'spidr/page/headers'
-require 'spidr/page/body'
-require 'spidr/page/links'
-
module Spidr
#
# Represents a requested page from a website.
#
class Page
@@ -32,47 +28,94 @@
@headers = response.to_hash
@doc = nil
end
#
- # The meta-redirect links of the page.
+ # The body of the response.
#
- # @return [Array<String>]
- # All meta-redirect links in the page.
+ # @return [String]
+ # The body of the response.
#
- # @deprecated
- # Deprecated in 0.3.0 and will be removed in 0.4.0.
- # Use {#meta_redirects} instead.
+ def body
+ (response.body || '')
+ end
+
+ alias to_s body
+
#
- def meta_redirect
- STDERR.puts 'DEPRECATION: Spidr::Page#meta_redirect will be removed in 0.3.0'
- STDERR.puts 'DEPRECATION: Use Spidr::Page#meta_redirects instead'
+ # Returns a parsed document object for HTML, XML, RSS and Atom pages.
+ #
+ # @return [Nokogiri::HTML::Document, Nokogiri::XML::Document, nil]
+ # The document that represents HTML or XML pages.
+ # Returns `nil` if the page is neither HTML, XML, RSS, Atom or if
+ # the page could not be parsed properly.
+ #
+ # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Document.html
+ # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/HTML/Document.html
+ #
+ def doc
+ unless body.empty?
+ doc_class = if html?
+ Nokogiri::HTML::Document
+ elsif rss? || atom? || xml? || xsl?
+ Nokogiri::XML::Document
+ end
- meta_redirects
+ if doc_class
+ begin
+ @doc ||= doc_class.parse(body, @url.to_s, content_charset)
+ rescue
+ end
+ end
+ end
end
#
- # Determines if the response code is `300`, `301`, `302`, `303`
- # or `307`. Also checks for "soft" redirects added at the page
- # level by a meta refresh tag.
+ # Searches the document for XPath or CSS Path paths.
#
- # @return [Boolean]
- # Specifies whether the response code is a HTTP Redirect code.
+ # @param [Array<String>] paths
+ # CSS or XPath expressions to search the document with.
#
- def is_redirect?
- case code
- when 300..303, 307
- true
- when 200
- meta_redirect?
+ # @return [Array]
+ # The matched nodes from the document.
+ # Returns an empty Array if no nodes were matched, or if the page
+ # is not an HTML or XML document.
+ #
+ # @example
+ # page.search('//a[@href]')
+ #
+ # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Node.html#M000239
+ #
+ def search(*paths)
+ if doc
+ doc.search(*paths)
else
- false
+ []
end
end
- alias redirect? is_redirect?
+ #
+ # Searches for the first occurrence an XPath or CSS Path expression.
+ #
+ # @return [Nokogiri::HTML::Node, Nokogiri::XML::Node, nil]
+ # The first matched node. Returns `nil` if no nodes could be matched,
+ # or if the page is not a HTML or XML document.
+ #
+ # @example
+ # page.at('//title')
+ #
+ # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Node.html#M000251
+ #
+ def at(*arguments)
+ if doc
+ doc.at(*arguments)
+ end
+ end
+ alias / search
+ alias % at
+
protected
#
# Provides transparent access to the values in {#headers}.
#
@@ -88,11 +131,11 @@
# @raise [NoMethodError]
# The missing method did not map to a header in {#headers}.
#
def method_missing(name,*arguments,&block)
if (arguments.empty? && block.nil?)
- header_name = name.to_s.sub('_','-')
+ header_name = name.to_s.tr('_','-')
if @response.key?(header_name)
return @response[header_name]
end
end
@@ -100,5 +143,10 @@
return super(name,*arguments,&block)
end
end
end
+
+require 'spidr/page/status_codes'
+require 'spidr/page/content_types'
+require 'spidr/page/cookies'
+require 'spidr/page/html'