lib/spidr/agent.rb in spidr-0.1.2 vs lib/spidr/agent.rb in spidr-0.1.3
- old
+ new
@@ -31,14 +31,15 @@
# If a _block_ is given, it will be passed the newly created
# Agent object.
#
# _options_ may contain the following keys:
# <tt>:proxy</tt>:: The proxy to use while spidering.
- # <tt>:user_agent</tt>:: the User-Agent string to send.
+ # <tt>:user_agent</tt>:: The User-Agent string to send.
# <tt>:referer</tt>:: The referer URL to send.
# <tt>:delay</tt>:: Duration in seconds to pause between spidering each
# link. Defaults to 0.
+ # <tt>:host</tt>:: The host-name to visit.
# <tt>:hosts</tt>:: An +Array+ of host patterns to visit.
# <tt>:ignore_hosts</tt>:: An +Array+ of host patterns to not visit.
# <tt>:ports</tt>:: An +Array+ of port patterns to visit.
# <tt>:ignore_ports</tt>:: An +Array+ of port patterns to not visit.
# <tt>:links</tt>:: An +Array+ of link patterns to visit.
@@ -76,10 +77,14 @@
@delay = (options[:delay] || 0)
@history = []
@queue = []
+ if options[:host]
+ visit_hosts_like(options[:host])
+ end
+
block.call(self) if block
end
#
# Creates a new Agent object with the given _options_ and will begin
@@ -100,11 +105,11 @@
# spidering the specified host _name_. If a _block_ is given it will be
# passed the newly created Agent object, before the agent begins
# spidering.
#
def self.host(name,options={},&block)
- self.new(options.merge(:hosts => [name.to_s])) do |spider|
+ self.new(options.merge(:host => name)) do |spider|
block.call(spider) if block
spider.start_at("http://#{name}/")
end
end
@@ -116,11 +121,11 @@
# begins spidering.
#
def self.site(url,options={},&block)
url = URI(url.to_s)
- return self.new(options.merge(:hosts => [url.host])) do |spider|
+ return self.new(options.merge(:host => url.host)) do |spider|
block.call(spider) if block
spider.start_at(url)
end
end
@@ -339,15 +344,10 @@
end
return self
end
- #
- # Returns the +Array+ of visited URLs.
- #
- def visited_urls
- @history
- end
+ alias visited_urls history
#
# Returns the +Array+ of visited URLs.
#
def visited_links