lib/spidr/agent.rb in spidr-0.1.1 vs lib/spidr/agent.rb in spidr-0.1.2

- old
+ new

@@ -50,18 +50,26 @@ def initialize(options={},&block) @proxy = (options[:proxy] || Spidr.proxy) @user_agent = (options[:user_agent] || Spidr.user_agent) @referer = options[:referer] - @host_rules = Rules.new(:accept => options[:hosts], - :reject => options[:ignore_hosts]) - @port_rules = Rules.new(:accept => options[:ports], - :reject => options[:ignore_ports]) - @link_rules = Rules.new(:accept => options[:links], - :reject => options[:ignore_links]) - @ext_rules = Rules.new(:accept => options[:exts], - :reject => options[:ignore_exts]) + @host_rules = Rules.new( + :accept => options[:hosts], + :reject => options[:ignore_hosts] + ) + @port_rules = Rules.new( + :accept => options[:ports], + :reject => options[:ignore_ports] + ) + @link_rules = Rules.new( + :accept => options[:links], + :reject => options[:ignore_links] + ) + @ext_rules = Rules.new( + :accept => options[:exts], + :reject => options[:ignore_exts] + ) @every_url_blocks = [] @urls_like_blocks = Hash.new { |hash,key| hash[key] = [] } @every_page_blocks = [] @@ -372,10 +380,16 @@ # def get_page(url,&block) host = url.host port = url.port + unless url.path.empty? + path = url.path + else + path = '/' + end + proxy_host = @proxy[:host] proxy_port = @proxy[:port] proxy_user = @proxy[:user] proxy_password = @proxy[:password] @@ -383,11 +397,11 @@ headers = {} headers['User-Agent'] = @user_agent if @user_agent headers['Referer'] = @referer if @referer - new_page = Page.new(url,sess.get(url.path,headers)) + new_page = Page.new(url,sess.get(path,headers)) block.call(new_page) if block return new_page end end @@ -461,11 +475,9 @@ @every_page_blocks.each { |page_block| page_block.call(page) } block.call(page) if block end end - - private def visit_scheme?(url) if url.scheme return SCHEMES.include?(url.scheme) else