lib/spidr/agent.rb in spidr-0.1.1 vs lib/spidr/agent.rb in spidr-0.1.2
- old
+ new
@@ -50,18 +50,26 @@
def initialize(options={},&block)
@proxy = (options[:proxy] || Spidr.proxy)
@user_agent = (options[:user_agent] || Spidr.user_agent)
@referer = options[:referer]
- @host_rules = Rules.new(:accept => options[:hosts],
- :reject => options[:ignore_hosts])
- @port_rules = Rules.new(:accept => options[:ports],
- :reject => options[:ignore_ports])
- @link_rules = Rules.new(:accept => options[:links],
- :reject => options[:ignore_links])
- @ext_rules = Rules.new(:accept => options[:exts],
- :reject => options[:ignore_exts])
+ @host_rules = Rules.new(
+ :accept => options[:hosts],
+ :reject => options[:ignore_hosts]
+ )
+ @port_rules = Rules.new(
+ :accept => options[:ports],
+ :reject => options[:ignore_ports]
+ )
+ @link_rules = Rules.new(
+ :accept => options[:links],
+ :reject => options[:ignore_links]
+ )
+ @ext_rules = Rules.new(
+ :accept => options[:exts],
+ :reject => options[:ignore_exts]
+ )
@every_url_blocks = []
@urls_like_blocks = Hash.new { |hash,key| hash[key] = [] }
@every_page_blocks = []
@@ -372,10 +380,16 @@
#
def get_page(url,&block)
host = url.host
port = url.port
+ unless url.path.empty?
+ path = url.path
+ else
+ path = '/'
+ end
+
proxy_host = @proxy[:host]
proxy_port = @proxy[:port]
proxy_user = @proxy[:user]
proxy_password = @proxy[:password]
@@ -383,11 +397,11 @@
headers = {}
headers['User-Agent'] = @user_agent if @user_agent
headers['Referer'] = @referer if @referer
- new_page = Page.new(url,sess.get(url.path,headers))
+ new_page = Page.new(url,sess.get(path,headers))
block.call(new_page) if block
return new_page
end
end
@@ -461,11 +475,9 @@
@every_page_blocks.each { |page_block| page_block.call(page) }
block.call(page) if block
end
end
-
- private
def visit_scheme?(url)
if url.scheme
return SCHEMES.include?(url.scheme)
else