lib/spidr/agent.rb in spidr-0.2.4 vs lib/spidr/agent.rb in spidr-0.2.5
- old
+ new
@@ -96,11 +96,11 @@
# for further configuration.
#
# @yieldparam [Agent] agent
# The newly created agent.
#
- def initialize(options={},&block)
+ def initialize(options={})
@host_header = options[:host_header]
@host_headers = {}
if options[:host_headers]
@host_headers.merge!(options[:host_headers])
@@ -119,11 +119,11 @@
@failures = Set[]
@queue = []
super(options)
- block.call(self) if block
+ yield self if block_given?
end
#
# Creates a new agent and begin spidering at the given URL.
#
@@ -138,13 +138,13 @@
# before it begins spidering.
#
# @yieldparam [Agent] agent
# The newly created agent.
#
- def self.start_at(url,options={},&block)
+ def self.start_at(url,options={})
self.new(options) do |spider|
- block.call(spider) if block
+ yield spider if block_given?
spider.start_at(url)
end
end
@@ -162,13 +162,13 @@
# before it begins spidering.
#
# @yieldparam [Agent] agent
# The newly created agent.
#
- def self.host(name,options={},&block)
+ def self.host(name,options={})
self.new(options.merge(:host => name)) do |spider|
- block.call(spider) if block
+ yield spider if block_given?
spider.start_at("http://#{name}/")
end
end
@@ -186,15 +186,15 @@
# before it begins spidering.
#
# @yieldparam [Agent] agent
# The newly created agent.
#
- def self.site(url,options={},&block)
+ def self.site(url,options={})
url = URI(url.to_s)
return self.new(options.merge(:host => url.host)) do |spider|
- block.call(spider) if block
+ yield spider if block_given?
spider.start_at(url)
end
end
@@ -455,15 +455,22 @@
if (!(queued?(url)) && visit?(url))
link = url.to_s
begin
- @every_url_blocks.each { |block| block.call(url) }
+ @every_url_blocks.each { |url_block| url_block.call(url) }
- @urls_like_blocks.each do |pattern,blocks|
- if ((pattern.kind_of?(Regexp) && link =~ pattern) || pattern == link || pattern == url)
- blocks.each { |url_block| url_block.call(url) }
+ @urls_like_blocks.each do |pattern,url_blocks|
+ match = case pattern
+ when Regexp
+ link =~ pattern
+ else
+ (pattern == link) || (pattern == url)
+ end
+
+ if match
+ url_blocks.each { |url_block| url_block.call(url) }
end
end
rescue Actions::Paused => action
raise(action)
rescue Actions::SkipLink
@@ -492,20 +499,20 @@
# The page for the response.
#
# @return [Page, nil]
# The page for the response, or `nil` if the request failed.
#
- def get_page(url,&block)
+ def get_page(url)
url = URI(url.to_s)
prepare_request(url) do |session,path,headers|
new_page = Page.new(url,session.get(path,headers))
# save any new cookies
@cookies.from_page(new_page)
- block.call(new_page) if block
+ yield new_page if block_given?
return new_page
end
end
#
@@ -527,20 +534,20 @@
# @return [Page, nil]
# The page for the response, or `nil` if the request failed.
#
# @since 0.2.2
#
- def post_page(url,post_data='',&block)
+ def post_page(url,post_data='')
url = URI(url.to_s)
prepare_request(url) do |session,path,headers|
new_page = Page.new(url,session.post(path,post_data,headers))
# save any new cookies
@cookies.from_page(new_page)
- block.call(new_page) if block
+ yield new_page if block_given?
return new_page
end
end
#
@@ -558,20 +565,20 @@
#
# @return [Page, nil]
# The page that was visited. If `nil` is returned, either the request
# for the page failed, or the page was skipped.
#
- def visit_page(url,&block)
+ def visit_page(url)
url = URI(url.to_s) unless url.kind_of?(URI)
get_page(url) do |page|
@history << page.url
begin
@every_page_blocks.each { |page_block| page_block.call(page) }
- block.call(page) if block
+ yield page if block_given?
rescue Actions::Paused => action
raise(action)
rescue Actions::SkipPage
return nil
rescue Actions::Action
@@ -666,11 +673,11 @@
end
begin
sleep(@delay) if @delay > 0
- block.call(@sessions[url],path,headers)
+ yield @sessions[url], path, headers
rescue SystemCallError,
Timeout::Error,
SocketError,
Net::HTTPBadResponse,
IOError
@@ -717,10 +724,10 @@
# @param [URI::HTTP] url
# The URL to add to the failures list.
#
def failed(url)
@failures << url
- @every_failed_url_blocks.each { |block| block.call(url) }
+ @every_failed_url_blocks.each { |fail_block| fail_block.call(url) }
return true
end
end
end