spec/arachni/spider_spec.rb in arachni-0.4.1.3 vs spec/arachni/spider_spec.rb in arachni-0.4.2

- old
+ new

@@ -11,130 +11,220 @@ reset_options @opts.url = @url Arachni::HTTP.instance.reset end - it 'should avoid infinite loops' do + it 'supports HTTPS' do + @opts.url = (server_url_for :spider_https).gsub( 'http', 'https' ) + spider = Arachni::Spider.new + + spider.run.size.should == 3 + spider.redirects.size.should == 2 + end + + it 'avoids infinite loops' do @opts.url = @url + 'loop' sitemap = Arachni::Spider.new.run expected = [ @opts.url, @opts.url + '_back' ] (sitemap & expected).sort.should == expected.sort end - it 'should preserve cookies' do + it 'preserves cookies' do @opts.url = @url + 'with_cookies' Arachni::Spider.new.run. include?( @url + 'with_cookies3' ).should be_true end - it 'should not follow redirections to foreign domains' do + it 'ignores redirections to foreign domains' do @opts.url = @url + 'foreign_domain' Arachni::Spider.new.run.should == [ @opts.url ] end - describe '#new' do - it 'should be initialized using the passed options' do + context 'when unable to get a response for the given URL' do + context 'due to a network error' do + it 'returns an empty sitemap and have failures' do + @opts.url = 'http://blahaha' + + s = Arachni::Spider.new( @opts ) + + s.url.should == @opts.url + s.run.should be_empty + s.failures.should be_any + end + end + + context 'due to a server error' do + it 'returns an empty sitemap and have failures' do + @opts.url = @url + '/fail' + + s = Arachni::Spider.new( @opts ) + + s.url.should == @opts.url + s.run.should be_empty + s.failures.should be_any + end + end + + it "retries #{Arachni::Spider::MAX_TRIES} times" do + @opts.url = @url + '/fail_4_times' + + s = Arachni::Spider.new( @opts ) + + s.url.should == @opts.url + s.run.should be_any + end + end + + describe '#failures' do + context 'when there are no failed requests' do + it 'returns an empty array' do + s = Arachni::Spider.new( @opts ) + s.run.should be_any + s.failures.should be_empty + end + end + context 'when there are failed requests' do + it 'returns an array containing the failed URLs' do + @opts.url = 'http://blahaha/' + + s = Arachni::Spider.new( @opts ) + + s.url.should == @opts.url + + s.run.should be_empty + s.failures.should be_any + s.failures.should include( @opts.url ) + end + end + end + + + describe '.new' do + it 'initializes it using the passed options' do Arachni::Spider.new( @opts ).url.should == @url end context 'when called without params' do - it 'should default to Arachni::Options.instance' do + it 'defaults to Arachni::Options.instance' do Arachni::Spider.new.url.should == @url end end context 'when the <extend_paths> option has been set' do - it 'should add those paths to be followed' do + it 'adds those paths to be followed' do @opts.extend_paths = %w(some_path) s = Arachni::Spider.new s.paths.sort.should == ([@url] | [@url + @opts.extend_paths.first]).sort end end end describe '#opts' do - it 'should return the init options' do + it 'returns the init options' do Arachni::Spider.new.opts.should == @opts end end describe '#redirects' do - it 'should hold an array of requested URLs that caused a redirect' do + it 'holds an array of requested URLs that caused a redirect' do @opts.url = @url + 'redirect' s = Arachni::Spider.new s.run s.redirects.should == [ s.url ] end end describe '#url' do - it 'should return the seed URL' do + it 'returns the seed URL' do Arachni::Spider.new.url.should == @url end end describe '#sitemap' do context 'when just initialized' do - it 'should be empty' do + it 'is empty' do Arachni::Spider.new.sitemap.should be_empty end end context 'after a crawl' do - it 'should return a list of crawled URLs' do + it 'returns a list of crawled URLs' do s = Arachni::Spider.new s.run s.sitemap.include?( @url ).should be_true end end end describe '#fancy_sitemap' do context 'when just initialized' do - it 'should be empty' do + it 'is empty' do spider = Arachni::Spider.new spider.fancy_sitemap.should be_empty end end context 'after a crawl' do - it 'should return a hash of crawled URLs with their HTTP response codes' do + it 'returns a hash of crawled URLs with their HTTP response codes' do spider = Arachni::Spider.new spider.run spider.fancy_sitemap.include?( @url ).should be_true spider.fancy_sitemap[@url].should == 200 spider.fancy_sitemap[@url + 'this_does_not_exist' ].should == 404 end end end describe '#run' do + it 'performs the crawl' do + @opts.url = @url + '/lots_of_paths' + + spider = Arachni::Spider.new + spider.run.size.should == 10051 + end + + it 'ignores path parameters' do + @opts.url = @url + '/path_params' + + spider = Arachni::Spider.new + spider.run.select { |url| url.include?( '/something' ) }.size.should == 1 + end + + context 'Options.exclude_pages' do + it 'skips pages which match the configured patterns' do + @opts.exclude_pages = /skip me/i + @opts.url = @url + '/skip' + + Arachni::Spider.new.run.should be_empty + end + end + context 'Options.do_not_crawl' do - it 'should not crawl the site' do + it 'does not crawl the site' do @opts.do_not_crawl Arachni::Spider.new.run.should be_nil end context 'when crawling is then enabled using Options.crawl' do - it 'should perform a crawl' do + it 'performs a crawl' do @opts.crawl Arachni::Spider.new.run.should be_any end end end context 'Options.auto_redundant' do describe 5 do - it 'should only crawl 5 URLs with identical query parameter names' do + it 'only crawls 5 URLs with identical query parameter names' do @opts.auto_redundant = 5 @opts.url += 'auto-redundant' Arachni::Spider.new.run.size.should == 11 end end end context 'when the link-count-limit option has been set' do - it 'should follow only a <link-count-limit> amount of paths' do + it 'follows only a <link-count-limit> amount of paths' do @opts.link_count_limit = 1 spider = Arachni::Spider.new spider.run.should == spider.sitemap spider.sitemap.should == [@url] @@ -143,11 +233,11 @@ spider.run.should == spider.sitemap spider.sitemap.size.should == 2 end end context 'when redundant rules have been set' do - it 'should follow the matching paths the specified amounts of time' do + it 'follows the matching paths the specified amounts of time' do @opts.url = @url + '/redundant' @opts.redundant = { 'redundant' => 2 } spider = Arachni::Spider.new spider.run.select { |url| url.include?( 'redundant' ) }.size.should == 2 @@ -156,88 +246,82 @@ spider = Arachni::Spider.new spider.run.select { |url| url.include?( 'redundant' ) }.size.should == 3 end end context 'when called without parameters' do - it 'should perform a crawl and return the sitemap' do + it 'performs a crawl and return the sitemap' do spider = Arachni::Spider.new spider.run.should == spider.sitemap spider.sitemap.should be_any end end context 'when called with a block only' do - it 'should pass the block each page as visited' do + it 'passes the block each page as visited' do spider = Arachni::Spider.new pages = [] spider.run { |page| pages << page } pages.size.should == spider.sitemap.size pages.first.is_a?( Arachni::Page ).should be_true end end context 'when a redirect that is outside the scope is encountered' do - it 'should be ignored' do + it 'is ignored' do @opts.url = @url + '/skip_redirect' spider = Arachni::Spider.new spider.run.should be_empty spider.redirects.size.should == 1 end end - it 'should follow relative redirect locations' do + it 'follows relative redirect locations' do @opts.url = @url + '/relative_redirect' @opts.redirect_limit = -1 spider = Arachni::Spider.new spider.run.select { |url| url.include?( 'stacked_redirect4' ) }.should be_any end - it 'should follow stacked redirects' do + it 'follows stacked redirects' do @opts.url = @url + '/stacked_redirect' @opts.redirect_limit = -1 spider = Arachni::Spider.new spider.run.select { |url| url.include?( 'stacked_redirect4' ) }.should be_any end - it 'should not follow stacked redirects that exceed the limit' do + it 'ignores stacked redirects that exceed the limit' do @opts.url = @url + '/stacked_redirect' @opts.redirect_limit = 3 spider = Arachni::Spider.new spider.run.size.should == 3 end + context 'when called with options and a block' do describe :pass_pages_to_block do describe true do - it 'should pass the block each page as visited' do + it 'passes the block each page as visited' do spider = Arachni::Spider.new pages = [] spider.run( true ) { |page| pages << page } pages.size.should == spider.sitemap.size pages.first.is_a?( Arachni::Page ).should be_true end end describe false do - it 'should pass the block each HTTP response as received' do + it 'passes the block each HTTP response as received' do spider = Arachni::Spider.new responses = [] spider.run( false ) { |res| responses << res } responses.size.should == spider.sitemap.size responses.first.is_a?( Typhoeus::Response ).should be_true end end end end - - it 'should ignore path parameters' do - @opts.url = @url + '/path_params' - - spider = Arachni::Spider.new - spider.run.select { |url| url.include?( '/something' ) }.size.should == 1 - end end describe '#on_each_page' do - it 'should be passed each page as visited' do + it 'is passed each page as visited' do pages = [] pages2 = [] s = Arachni::Spider.new @@ -252,11 +336,11 @@ pages.first.is_a?( Arachni::Page ).should be_true end end describe '#on_each_response' do - it 'should be passed each response as received' do + it 'is passed each response as received' do responses = [] responses2 = [] s = Arachni::Spider.new @@ -271,11 +355,11 @@ responses.first.is_a?( Typhoeus::Response ).should be_true end end describe '#on_complete' do - it 'should be called once the crawl it done' do + it 'is called once the crawl it done' do s = Arachni::Spider.new called = false called2 = false s.on_complete { called = true }.should == s s.on_complete { called2 = true }.should == s @@ -284,11 +368,11 @@ called.should be_true end end describe '#push' do - it 'should push paths for the crawler to follow' do + it 'pushes paths for the crawler to follow' do s = Arachni::Spider.new path = @url + 'a_pushed_path' s.push( path ) s.paths.include?( path ).should be_true s.run @@ -302,11 +386,11 @@ s.run (s.paths & paths).should be_empty (s.sitemap & paths).sort.should == paths.sort end - it 'should normalize and follow the pushed paths' do + it 'normalizes and follow the pushed paths' do s = Arachni::Spider.new p = 'some-path blah! %&$' wp = 'another weird path %"&*[$)' nwp = Arachni::Module::Utilities.to_absolute( wp ) @@ -329,58 +413,83 @@ #end end describe '#done?' do context 'when not running' do - it 'should return false' do + it 'returns false' do s = Arachni::Spider.new s.done?.should be_false end end context 'when running' do - it 'should return false' do + it 'returns false' do s = Arachni::Spider.new Thread.new{ s.run } s.done?.should be_false end end context 'when it has finished' do - it 'should return true' do + it 'returns true' do s = Arachni::Spider.new s.run s.done?.should be_true end end end + describe '#running?' do + context 'when not running' do + it 'returns false' do + s = Arachni::Spider.new + s.running?.should be_false + end + end + context 'when running' do + it 'returns false' do + @opts.url = server_url_for( :auditor ) + '/sleep' + s = Arachni::Spider.new + Thread.new{ s.run } + sleep 1 + s.running?.should be_true + end + end + context 'when it has finished' do + it 'returns true' do + s = Arachni::Spider.new + s.run + s.running?.should be_false + end + end + end + describe '#pause' do - it 'should pause a running crawl' do + it 'pauses a running crawl' do s = Arachni::Spider.new Thread.new{ s.run } s.pause sleep 1 s.sitemap.should be_empty end end describe '#paused?' do context 'when the crawl is not paused' do - it 'should return false' do + it 'returns false' do s = Arachni::Spider.new s.paused?.should be_false end end context 'when the crawl is paused' do - it 'should return true' do + it 'returns true' do s = Arachni::Spider.new s.pause s.paused?.should be_true end end end describe '#resume' do - it 'should resume a paused crawl' do + it 'resumes a paused crawl' do @opts.url = @url + 'sleep' s = Arachni::Spider.new s.pause Thread.new{ s.run } sleep 1