lib/spidr/agent.rb in spidr-0.2.4 vs lib/spidr/agent.rb in spidr-0.2.5

- old
+ new

@@ -96,11 +96,11 @@ # for further configuration. # # @yieldparam [Agent] agent # The newly created agent. # - def initialize(options={},&block) + def initialize(options={}) @host_header = options[:host_header] @host_headers = {} if options[:host_headers] @host_headers.merge!(options[:host_headers]) @@ -119,11 +119,11 @@ @failures = Set[] @queue = [] super(options) - block.call(self) if block + yield self if block_given? end # # Creates a new agent and begin spidering at the given URL. # @@ -138,13 +138,13 @@ # before it begins spidering. # # @yieldparam [Agent] agent # The newly created agent. # - def self.start_at(url,options={},&block) + def self.start_at(url,options={}) self.new(options) do |spider| - block.call(spider) if block + yield spider if block_given? spider.start_at(url) end end @@ -162,13 +162,13 @@ # before it begins spidering. # # @yieldparam [Agent] agent # The newly created agent. # - def self.host(name,options={},&block) + def self.host(name,options={}) self.new(options.merge(:host => name)) do |spider| - block.call(spider) if block + yield spider if block_given? spider.start_at("http://#{name}/") end end @@ -186,15 +186,15 @@ # before it begins spidering. # # @yieldparam [Agent] agent # The newly created agent. # - def self.site(url,options={},&block) + def self.site(url,options={}) url = URI(url.to_s) return self.new(options.merge(:host => url.host)) do |spider| - block.call(spider) if block + yield spider if block_given? spider.start_at(url) end end @@ -455,15 +455,22 @@ if (!(queued?(url)) && visit?(url)) link = url.to_s begin - @every_url_blocks.each { |block| block.call(url) } + @every_url_blocks.each { |url_block| url_block.call(url) } - @urls_like_blocks.each do |pattern,blocks| - if ((pattern.kind_of?(Regexp) && link =~ pattern) || pattern == link || pattern == url) - blocks.each { |url_block| url_block.call(url) } + @urls_like_blocks.each do |pattern,url_blocks| + match = case pattern + when Regexp + link =~ pattern + else + (pattern == link) || (pattern == url) + end + + if match + url_blocks.each { |url_block| url_block.call(url) } end end rescue Actions::Paused => action raise(action) rescue Actions::SkipLink @@ -492,20 +499,20 @@ # The page for the response. # # @return [Page, nil] # The page for the response, or `nil` if the request failed. # - def get_page(url,&block) + def get_page(url) url = URI(url.to_s) prepare_request(url) do |session,path,headers| new_page = Page.new(url,session.get(path,headers)) # save any new cookies @cookies.from_page(new_page) - block.call(new_page) if block + yield new_page if block_given? return new_page end end # @@ -527,20 +534,20 @@ # @return [Page, nil] # The page for the response, or `nil` if the request failed. # # @since 0.2.2 # - def post_page(url,post_data='',&block) + def post_page(url,post_data='') url = URI(url.to_s) prepare_request(url) do |session,path,headers| new_page = Page.new(url,session.post(path,post_data,headers)) # save any new cookies @cookies.from_page(new_page) - block.call(new_page) if block + yield new_page if block_given? return new_page end end # @@ -558,20 +565,20 @@ # # @return [Page, nil] # The page that was visited. If `nil` is returned, either the request # for the page failed, or the page was skipped. # - def visit_page(url,&block) + def visit_page(url) url = URI(url.to_s) unless url.kind_of?(URI) get_page(url) do |page| @history << page.url begin @every_page_blocks.each { |page_block| page_block.call(page) } - block.call(page) if block + yield page if block_given? rescue Actions::Paused => action raise(action) rescue Actions::SkipPage return nil rescue Actions::Action @@ -666,11 +673,11 @@ end begin sleep(@delay) if @delay > 0 - block.call(@sessions[url],path,headers) + yield @sessions[url], path, headers rescue SystemCallError, Timeout::Error, SocketError, Net::HTTPBadResponse, IOError @@ -717,10 +724,10 @@ # @param [URI::HTTP] url # The URL to add to the failures list. # def failed(url) @failures << url - @every_failed_url_blocks.each { |block| block.call(url) } + @every_failed_url_blocks.each { |fail_block| fail_block.call(url) } return true end end end