lib/spidr/agent.rb in spidr-0.1.2 vs lib/spidr/agent.rb in spidr-0.1.3

- old
+ new

@@ -31,14 +31,15 @@ # If a _block_ is given, it will be passed the newly created # Agent object. # # _options_ may contain the following keys: # <tt>:proxy</tt>:: The proxy to use while spidering. - # <tt>:user_agent</tt>:: the User-Agent string to send. + # <tt>:user_agent</tt>:: The User-Agent string to send. # <tt>:referer</tt>:: The referer URL to send. # <tt>:delay</tt>:: Duration in seconds to pause between spidering each # link. Defaults to 0. + # <tt>:host</tt>:: The host-name to visit. # <tt>:hosts</tt>:: An +Array+ of host patterns to visit. # <tt>:ignore_hosts</tt>:: An +Array+ of host patterns to not visit. # <tt>:ports</tt>:: An +Array+ of port patterns to visit. # <tt>:ignore_ports</tt>:: An +Array+ of port patterns to not visit. # <tt>:links</tt>:: An +Array+ of link patterns to visit. @@ -76,10 +77,14 @@ @delay = (options[:delay] || 0) @history = [] @queue = [] + if options[:host] + visit_hosts_like(options[:host]) + end + block.call(self) if block end # # Creates a new Agent object with the given _options_ and will begin @@ -100,11 +105,11 @@ # spidering the specified host _name_. If a _block_ is given it will be # passed the newly created Agent object, before the agent begins # spidering. # def self.host(name,options={},&block) - self.new(options.merge(:hosts => [name.to_s])) do |spider| + self.new(options.merge(:host => name)) do |spider| block.call(spider) if block spider.start_at("http://#{name}/") end end @@ -116,11 +121,11 @@ # begins spidering. # def self.site(url,options={},&block) url = URI(url.to_s) - return self.new(options.merge(:hosts => [url.host])) do |spider| + return self.new(options.merge(:host => url.host)) do |spider| block.call(spider) if block spider.start_at(url) end end @@ -339,15 +344,10 @@ end return self end - # - # Returns the +Array+ of visited URLs. - # - def visited_urls - @history - end + alias visited_urls history # # Returns the +Array+ of visited URLs. # def visited_links