spider_spec.rb in arachni-0.4.2

- old
+ new

@@ -11,130 +11,220 @@
         reset_options
         @opts.url = @url
         Arachni::HTTP.instance.reset
     end
 
-    it 'should avoid infinite loops' do
+    it 'supports HTTPS' do
+        @opts.url = (server_url_for :spider_https).gsub( 'http', 'https' )
+        spider = Arachni::Spider.new
+
+        spider.run.size.should == 3
+        spider.redirects.size.should == 2
+    end
+
+    it 'avoids infinite loops' do
         @opts.url = @url + 'loop'
         sitemap = Arachni::Spider.new.run
 
         expected = [ @opts.url, @opts.url + '_back' ]
         (sitemap & expected).sort.should == expected.sort
     end
 
-    it 'should preserve cookies' do
+    it 'preserves cookies' do
         @opts.url = @url + 'with_cookies'
         Arachni::Spider.new.run.
             include?( @url + 'with_cookies3' ).should be_true
     end
 
-    it 'should not follow redirections to foreign domains' do
+    it 'ignores redirections to foreign domains' do
         @opts.url = @url + 'foreign_domain'
         Arachni::Spider.new.run.should == [ @opts.url ]
     end
 
-    describe '#new' do
-        it 'should be initialized using the passed options' do
+    context 'when unable to get a response for the given URL' do
+        context 'due to a network error' do
+            it 'returns an empty sitemap and have failures' do
+                @opts.url = 'http://blahaha'
+
+                s = Arachni::Spider.new( @opts )
+
+                s.url.should == @opts.url
+                s.run.should be_empty
+                s.failures.should be_any
+            end
+        end
+
+        context 'due to a server error' do
+            it 'returns an empty sitemap and have failures' do
+                @opts.url = @url + '/fail'
+
+                s = Arachni::Spider.new( @opts )
+
+                s.url.should == @opts.url
+                s.run.should be_empty
+                s.failures.should be_any
+            end
+        end
+
+        it "retries #{Arachni::Spider::MAX_TRIES} times" do
+            @opts.url = @url + '/fail_4_times'
+
+            s = Arachni::Spider.new( @opts )
+
+            s.url.should == @opts.url
+            s.run.should be_any
+        end
+    end
+
+    describe '#failures' do
+        context 'when there are no failed requests' do
+            it 'returns an empty array' do
+                s = Arachni::Spider.new( @opts )
+                s.run.should be_any
+                s.failures.should be_empty
+            end
+        end
+        context 'when there are failed requests' do
+            it 'returns an array containing the failed URLs' do
+                @opts.url = 'http://blahaha/'
+
+                s = Arachni::Spider.new( @opts )
+
+                s.url.should == @opts.url
+
+                s.run.should be_empty
+                s.failures.should be_any
+                s.failures.should include( @opts.url )
+            end
+        end
+    end
+
+
+    describe '.new' do
+        it 'initializes it using the passed options' do
             Arachni::Spider.new( @opts ).url.should == @url
         end
 
         context 'when called without params' do
-            it 'should default to Arachni::Options.instance' do
+            it 'defaults to Arachni::Options.instance' do
                 Arachni::Spider.new.url.should == @url
             end
         end
 
         context 'when the <extend_paths> option has been set' do
-            it 'should add those paths to be followed' do
+            it 'adds those paths to be followed' do
                 @opts.extend_paths = %w(some_path)
                 s = Arachni::Spider.new
                 s.paths.sort.should == ([@url] | [@url + @opts.extend_paths.first]).sort
             end
         end
     end
 
     describe '#opts' do
-        it 'should return the init options' do
+        it 'returns the init options' do
             Arachni::Spider.new.opts.should == @opts
         end
     end
 
     describe '#redirects' do
-        it 'should hold an array of requested URLs that caused a redirect' do
+        it 'holds an array of requested URLs that caused a redirect' do
             @opts.url = @url + 'redirect'
             s = Arachni::Spider.new
             s.run
             s.redirects.should == [ s.url ]
         end
     end
 
     describe '#url' do
-        it 'should return the seed URL' do
+        it 'returns the seed URL' do
             Arachni::Spider.new.url.should == @url
         end
     end
 
     describe '#sitemap' do
         context 'when just initialized' do
-            it 'should be empty' do
+            it 'is empty' do
                 Arachni::Spider.new.sitemap.should be_empty
             end
         end
 
         context 'after a crawl' do
-            it 'should return a list of crawled URLs' do
+            it 'returns a list of crawled URLs' do
                 s = Arachni::Spider.new
                 s.run
                 s.sitemap.include?( @url ).should be_true
             end
         end
     end
 
     describe '#fancy_sitemap' do
         context 'when just initialized' do
-            it 'should be empty' do
+            it 'is empty' do
                 spider = Arachni::Spider.new
                 spider.fancy_sitemap.should be_empty
             end
         end
 
         context 'after a crawl' do
-            it 'should return a hash of crawled URLs with their HTTP response codes' do
+            it 'returns a hash of crawled URLs with their HTTP response codes' do
                 spider = Arachni::Spider.new
                 spider.run
                 spider.fancy_sitemap.include?( @url ).should be_true
                 spider.fancy_sitemap[@url].should == 200
                 spider.fancy_sitemap[@url + 'this_does_not_exist' ].should == 404
             end
         end
     end
 
     describe '#run' do
+        it 'performs the crawl' do
+            @opts.url = @url + '/lots_of_paths'
+
+            spider = Arachni::Spider.new
+            spider.run.size.should == 10051
+        end
+
+        it 'ignores path parameters' do
+            @opts.url = @url + '/path_params'
+
+            spider = Arachni::Spider.new
+            spider.run.select { |url| url.include?( '/something' ) }.size.should == 1
+        end
+
+        context 'Options.exclude_pages' do
+            it 'skips pages which match the configured patterns' do
+                @opts.exclude_pages = /skip me/i
+                @opts.url = @url + '/skip'
+
+                Arachni::Spider.new.run.should be_empty
+            end
+        end
+
         context 'Options.do_not_crawl' do
-            it 'should not crawl the site' do
+            it 'does not crawl the site' do
                 @opts.do_not_crawl
                 Arachni::Spider.new.run.should be_nil
             end
 
             context 'when crawling is then enabled using Options.crawl' do
-                it 'should perform a crawl' do
+                it 'performs a crawl' do
                     @opts.crawl
                     Arachni::Spider.new.run.should be_any
                 end
             end
         end
         context 'Options.auto_redundant' do
             describe 5 do
-                it 'should only crawl 5 URLs with identical query parameter names' do
+                it 'only crawls 5 URLs with identical query parameter names' do
                     @opts.auto_redundant = 5
                     @opts.url += 'auto-redundant'
                     Arachni::Spider.new.run.size.should == 11
                 end
             end
         end
         context 'when the link-count-limit option has been set' do
-            it 'should follow only a <link-count-limit> amount of paths' do
+            it 'follows only a <link-count-limit> amount of paths' do
                 @opts.link_count_limit = 1
                 spider = Arachni::Spider.new
                 spider.run.should == spider.sitemap
                 spider.sitemap.should == [@url]
 
@@ -143,11 +233,11 @@
                 spider.run.should == spider.sitemap
                 spider.sitemap.size.should == 2
             end
         end
         context 'when redundant rules have been set' do
-            it 'should follow the matching paths the specified amounts of time' do
+            it 'follows the matching paths the specified amounts of time' do
                 @opts.url = @url + '/redundant'
 
                 @opts.redundant = { 'redundant' => 2 }
                 spider = Arachni::Spider.new
                 spider.run.select { |url| url.include?( 'redundant' ) }.size.should == 2
@@ -156,88 +246,82 @@
                 spider = Arachni::Spider.new
                 spider.run.select { |url| url.include?( 'redundant' ) }.size.should == 3
             end
         end
         context 'when called without parameters' do
-            it 'should perform a crawl and return the sitemap' do
+            it 'performs a crawl and return the sitemap' do
                 spider = Arachni::Spider.new
                 spider.run.should == spider.sitemap
                 spider.sitemap.should be_any
             end
         end
         context 'when called with a block only' do
-            it 'should pass the block each page as visited' do
+            it 'passes the block each page as visited' do
                 spider = Arachni::Spider.new
                 pages = []
                 spider.run { |page| pages << page }
                 pages.size.should == spider.sitemap.size
                 pages.first.is_a?( Arachni::Page ).should be_true
             end
         end
         context 'when a redirect that is outside the scope is encountered' do
-            it 'should be ignored' do
+            it 'is ignored' do
                 @opts.url = @url + '/skip_redirect'
 
                 spider = Arachni::Spider.new
                 spider.run.should be_empty
                 spider.redirects.size.should == 1
             end
         end
-        it 'should follow relative redirect locations' do
+        it 'follows relative redirect locations' do
             @opts.url = @url + '/relative_redirect'
             @opts.redirect_limit = -1
 
             spider = Arachni::Spider.new
             spider.run.select { |url| url.include?( 'stacked_redirect4' ) }.should be_any
         end
-        it 'should follow stacked redirects' do
+        it 'follows stacked redirects' do
             @opts.url = @url + '/stacked_redirect'
             @opts.redirect_limit = -1
 
             spider = Arachni::Spider.new
             spider.run.select { |url| url.include?( 'stacked_redirect4' ) }.should be_any
         end
-        it 'should not follow stacked redirects that exceed the limit' do
+        it 'ignores stacked redirects that exceed the limit' do
             @opts.url = @url + '/stacked_redirect'
             @opts.redirect_limit = 3
 
             spider = Arachni::Spider.new
             spider.run.size.should == 3
         end
+
         context 'when called with options and a block' do
             describe :pass_pages_to_block do
                 describe true do
-                    it 'should pass the block each page as visited' do
+                    it 'passes the block each page as visited' do
                         spider = Arachni::Spider.new
                         pages = []
                         spider.run( true ) { |page| pages << page }
                         pages.size.should == spider.sitemap.size
                         pages.first.is_a?( Arachni::Page ).should be_true
                     end
                 end
                 describe false do
-                    it 'should pass the block each HTTP response as received' do
+                    it 'passes the block each HTTP response as received' do
                         spider = Arachni::Spider.new
                         responses = []
                         spider.run( false ) { |res| responses << res }
                         responses.size.should == spider.sitemap.size
                         responses.first.is_a?( Typhoeus::Response ).should be_true
                     end
                 end
             end
         end
-
-        it 'should ignore path parameters' do
-            @opts.url = @url + '/path_params'
-
-            spider = Arachni::Spider.new
-            spider.run.select { |url| url.include?( '/something' ) }.size.should == 1
-        end
     end
 
     describe '#on_each_page' do
-        it 'should be passed each page as visited' do
+        it 'is passed each page as visited' do
             pages  = []
             pages2 = []
 
             s = Arachni::Spider.new
 
@@ -252,11 +336,11 @@
             pages.first.is_a?( Arachni::Page ).should be_true
         end
     end
 
     describe '#on_each_response' do
-        it 'should be passed each response as received' do
+        it 'is passed each response as received' do
             responses  = []
             responses2 = []
 
             s = Arachni::Spider.new
 
@@ -271,11 +355,11 @@
             responses.first.is_a?( Typhoeus::Response ).should be_true
         end
     end
 
     describe '#on_complete' do
-        it 'should be called once the crawl it done' do
+        it 'is called once the crawl it done' do
             s = Arachni::Spider.new
             called = false
             called2 = false
             s.on_complete { called = true }.should == s
             s.on_complete { called2 = true }.should == s
@@ -284,11 +368,11 @@
             called.should be_true
         end
     end
 
     describe '#push' do
-        it 'should push paths for the crawler to follow' do
+        it 'pushes paths for the crawler to follow' do
             s = Arachni::Spider.new
             path = @url + 'a_pushed_path'
             s.push( path )
             s.paths.include?( path ).should be_true
             s.run
@@ -302,11 +386,11 @@
             s.run
             (s.paths & paths).should be_empty
             (s.sitemap & paths).sort.should == paths.sort
         end
 
-        it 'should normalize and follow the pushed paths' do
+        it 'normalizes and follow the pushed paths' do
             s = Arachni::Spider.new
             p = 'some-path blah! %&$'
 
             wp = 'another weird path %"&*[$)'
             nwp = Arachni::Module::Utilities.to_absolute( wp )
@@ -329,58 +413,83 @@
         #end
     end
 
     describe '#done?' do
         context 'when not running' do
-            it 'should return false' do
+            it 'returns false' do
                 s = Arachni::Spider.new
                 s.done?.should be_false
             end
         end
         context 'when running' do
-            it 'should return false' do
+            it 'returns false' do
                 s = Arachni::Spider.new
                 Thread.new{ s.run }
                 s.done?.should be_false
             end
         end
         context 'when it has finished' do
-            it 'should return true' do
+            it 'returns true' do
                 s = Arachni::Spider.new
                 s.run
                 s.done?.should be_true
             end
         end
     end
 
+    describe '#running?' do
+        context 'when not running' do
+            it 'returns false' do
+                s = Arachni::Spider.new
+                s.running?.should be_false
+            end
+        end
+        context 'when running' do
+            it 'returns false' do
+                @opts.url = server_url_for( :auditor ) + '/sleep'
+                s = Arachni::Spider.new
+                Thread.new{ s.run }
+                sleep 1
+                s.running?.should be_true
+            end
+        end
+        context 'when it has finished' do
+            it 'returns true' do
+                s = Arachni::Spider.new
+                s.run
+                s.running?.should be_false
+            end
+        end
+    end
+
     describe '#pause' do
-        it 'should pause a running crawl' do
+        it 'pauses a running crawl' do
             s = Arachni::Spider.new
             Thread.new{ s.run }
             s.pause
             sleep 1
             s.sitemap.should be_empty
         end
     end
 
     describe '#paused?' do
         context 'when the crawl is not paused' do
-            it 'should return false' do
+            it 'returns false' do
                 s = Arachni::Spider.new
                 s.paused?.should be_false
             end
         end
         context 'when the crawl is paused' do
-            it 'should return true' do
+            it 'returns true' do
                 s = Arachni::Spider.new
                 s.pause
                 s.paused?.should be_true
             end
         end
     end
 
     describe '#resume' do
-        it 'should resume a paused crawl' do
+        it 'resumes a paused crawl' do
             @opts.url = @url + 'sleep'
             s = Arachni::Spider.new
             s.pause
             Thread.new{ s.run }
             sleep 1