=begin Arachni Copyright (c) 2010-2012 Tasos "Zapotek" Laskos This is free software; you can copy and distribute and modify this program under the term of the GPL v2.0 License (See LICENSE file for details) =end require 'ap' require 'pp' require 'rubygems' require File.expand_path( File.dirname( __FILE__ ) ) + '/options' opts = Arachni::Options.instance require opts.dir['lib'] + 'version' require opts.dir['lib'] + 'ruby' require opts.dir['lib'] + 'exceptions' require opts.dir['lib'] + 'spider' require opts.dir['lib'] + 'parser' require opts.dir['lib'] + 'issue' require opts.dir['lib'] + 'module' require opts.dir['lib'] + 'plugin' require opts.dir['lib'] + 'audit_store' require opts.dir['lib'] + 'http' require opts.dir['lib'] + 'report' require opts.dir['lib'] + 'database' require opts.dir['lib'] + 'component_manager' require opts.dir['mixins'] + 'progress_bar' module Arachni # # The Framework class ties together all the components. # # It should be wrapped by a UI class. # # It's the brains of the operation, it bosses the rest of the classes around. # # It runs the audit, loads modules and reports and runs them according to # user options. # # @author: Tasos "Zapotek" Laskos # # # @version: 0.2.5 # class Framework # # include the output interface but try to use it as little as possible # # the UI classes should take care of communicating with the user # include Arachni::UI::Output include Arachni::Module::Utilities include Arachni::Mixins::Observable # the version of *this* class REVISION = '0.2.5' # # Instance options # # @return [Options] # attr_reader :opts # # @return [Arachni::Report::Manager] report manager # attr_reader :reports # # @return [Arachni::Module::Manager] module manager # attr_reader :modules # # @return [Arachni::Plugin::Manager] plugin manager # attr_reader :plugins # # @return [Arachni::Spider] spider # attr_reader :spider # # URLs of all discovered pages # # @return [Array] # attr_reader :sitemap # # Array of URLs that have been audited # # @return [Array] # attr_reader :auditmap # # Total number of pages added to their audit queue # # @return [Integer] # attr_reader :page_queue_total_size # # Current amount of pages in the audit queue # # @return [Integer] # attr_reader :page_queue_size # # Total number of urls added to their audit queue # # @return [Integer] # attr_reader :url_queue_total_size # # Current amount of urls in the audit queue # # @return [Integer] # attr_reader :url_queue_size # # Initializes system components. # # @param [Options] opts # def initialize( opts ) Encoding.default_external = "BINARY" Encoding.default_internal = "BINARY" @opts = opts @modules = Arachni::Module::Manager.new( @opts ) @reports = Arachni::Report::Manager.new( @opts ) @plugins = Arachni::Plugin::Manager.new( self ) # will store full-fledged pages generated by the Trainer since these # may not be be accessible simply by their URL # @page_queue = ::Arachni::Database::Queue.new @page_queue = Queue.new @page_queue_total_size = 0 # will hold paths found by the spider in order to be converted to pages # and ultimately audited by the modules @url_queue = Queue.new @url_queue_total_size = 0 prepare_cookie_jar( ) prepare_user_agent( ) # deep clone the redundancy rules to preserve their counter # for the reports @orig_redundant = @opts.redundant.deep_clone @running = false @paused = [] @plugin_store = {} @store = nil @auditmap = [] @sitemap = [] @current_url = '' end # # @return [Arachni::HTTP] HTTP instance # def http Arachni::HTTP.instance end # # Prepares the framework for the audit. # # Sets the status to 'running', starts the clock and runs the plugins. # # Must be called just before calling {#audit}. # def prepare @running = true @opts.start_datetime = Time.now # run all plugins @plugins.run end # # Runs the system # # It parses the instance options, {#prepare}, runs the {#audit} and {#clean_up!}. # # @param [Block] &block a block to call after the audit has finished # but before running the reports # def run( &block ) prepare # catch exceptions so that if something breaks down or the user opted to # exit the reports will still run with whatever results # Arachni managed to gather begin # start the audit exception_jail{ audit( ) } rescue Exception => e # ap e # ap e.backtrace end clean_up! begin block.call if block rescue Exception end # run reports if( @opts.reports && !@opts.reports.empty? ) exception_jail{ @reports.run( audit_store( ) ) } end return true end # # Returns the following framework stats: # # * :requests -- HTTP request count # * :responses -- HTTP response count # * :time_out_count -- Amount of timed-out requests # * :time -- Amount of running time # * :avg -- Average requests per second # * :sitemap_size -- Number of discovered pages # * :auditmap_size -- Number of audited pages # * :progress -- Progress percentage # * :curr_res_time -- Average response time for the current burst of requests # * :curr_res_cnt -- Amount of responses for the current burst # * :curr_avg -- Average requests per second for the current burst # * :average_res_time -- Average response time # * :max_concurrency -- Current maximum concurrency of HTTP requests # * :current_page -- URL of the currently audited page # * :eta -- Estimated time of arrival i.e. estimated remaining time # # @param [Bool] refresh_time updates the running time of the audit # (usefully when you want stats while paused without messing with the clocks) # # @param [Bool] override_refresh # # @return [Hash] # def stats( refresh_time = false, override_refresh = false ) req_cnt = http.request_count res_cnt = http.response_count @opts.start_datetime = Time.now if !@opts.start_datetime sitemap_sz = @url_queue_total_size + @page_queue_total_size auditmap_sz = @auditmap.size if( !refresh_time || auditmap_sz == sitemap_sz ) && !override_refresh @opts.delta_time ||= Time.now - @opts.start_datetime else @opts.delta_time = Time.now - @opts.start_datetime end curr_avg = 0 if http.curr_res_cnt > 0 && http.curr_res_time > 0 curr_avg = (http.curr_res_cnt / http.curr_res_time).to_i end avg = 0 if res_cnt > 0 avg = ( res_cnt / @opts.delta_time ).to_i end # we need to remove URLs that lead to redirects from the sitemap # when calculating the progress %. # # this is because even though these URLs are valid webapp paths # they are not actual pages and thus can't be audited; # so the sitemap and auditmap will never match and the progress will # never get to 100% which may confuse users. # if @spider redir_sz = @spider.redirects.size else redir_sz = 0 end # # There are 2 audit phases: # * regular analysis attacks # * timing attacks # # When calculating the progress % we have to take both into account, # however each is calculated using different criteria. # # Progress of regular attacks is calculated as: # amount of audited pages / amount of all discovered pages # # However, the progress of the timing attacks is calculated as: # amount of called timeout blocks / amount of total blocks # # The timing attack modules are run with the regular ones however # their procedures are piled up into an array of Procs # which are called after the regular attacks. # # So when we reach the point of needing to include their progress in # the overall progress percentage we'll be working with accurate # data regarding the total blocks, etc. # # # If we have timing attacks then each phase must account for half # of the progress. # # This is not very granular but it's good enough for now... # if Arachni::Module::Auditor.timeout_loaded_modules.size > 0 multi = 50 else multi = 100 end progress = (Float( auditmap_sz ) / ( sitemap_sz - redir_sz ) ) * multi if Arachni::Module::Auditor.running_timeout_attacks? called_blocks = Arachni::Module::Auditor.timeout_audit_operations_cnt - Arachni::Module::Auditor.current_timeout_audit_operations_cnt progress += ( Float( called_blocks ) / Arachni::Module::Auditor.timeout_audit_operations_cnt ) * multi end begin progress = Float( sprintf( "%.2f", progress ) ) rescue progress = 0.0 end # sometimes progress may slightly exceed 100% # which can cause a few strange stuff to happen progress = 100.0 if progress > 100.0 return { :requests => req_cnt, :responses => res_cnt, :time_out_count => http.time_out_count, :time => audit_store.delta_time, :avg => avg, :sitemap_size => @sitemap.size, :auditmap_size => auditmap_sz, :progress => progress, :curr_res_time => http.curr_res_time, :curr_res_cnt => http.curr_res_cnt, :curr_avg => curr_avg, :average_res_time => http.average_res_time, :max_concurrency => http.max_concurrency, :current_page => @current_url, :eta => ::Arachni::Mixins::ProgressBar.eta( progress, @opts.start_datetime ) } end # # Pushes a page to the page audit queue and updates {#page_queue_total_size} # def push_to_page_queue( page ) @page_queue << page @page_queue_total_size += 1 end # # Pushes a URL to the URL audit queue and updates {#url_queue_total_size} # def push_to_url_queue( url ) @url_queue << url @url_queue_total_size += 1 end # # Performs the audit # # Runs the spider, pushes each page or url to their respective audit queue, # calls {#audit_queue}, runs the timeout attacks ({Arachni::Module::Auditor.timeout_audit_run}) and finally re-runs # {#audit_queue} in case the timing attacks uncovered a new page. # def audit wait_if_paused @spider = Arachni::Spider.new( @opts ) # if we're restricted to a given list of paths there's no reason to run the spider if @opts.restrict_paths && !@opts.restrict_paths.empty? @sitemap = @opts.restrict_paths @sitemap.each { |url| push_to_url_queue( url_sanitize( url ) ) } else # initiates the crawl @spider.run( false ) { |response| @sitemap |= @spider.sitemap push_to_url_queue( url_sanitize( response.effective_url ) ) } end audit_queue exception_jail { if !Arachni::Module::Auditor.timeout_audit_blocks.empty? print_line print_status( 'Running timing attacks.' ) print_info( '---------------------------------------' ) Arachni::Module::Auditor.on_timing_attacks { |res, elem| @current_url = elem.action if !elem.action.empty? } Arachni::Module::Auditor.timeout_audit_run end audit_queue } end # # Audits the URL and Page queues # def audit_queue # goes through the URLs discovered by the spider, repeats the request # and parses the responses into page objects # # yes...repeating the request is wasteful but we can't store the # responses of the spider to consume them here because there's no way # of knowing how big the site will be. # while( !@url_queue.empty? && url = @url_queue.pop ) http.get( url, :remove_id => true ).on_complete { |res| page = Arachni::Parser::Page.from_http_response( res, @opts ) # audit the page exception_jail{ run_mods( page ) } # don't let the page queue build up, # consume it as soon as possible because the pages are stored # in the FS and thus take up precious system resources audit_page_queue } harvest_http_responses if !@opts.http_harvest_last end harvest_http_responses if( @opts.http_harvest_last ) audit_page_queue harvest_http_responses if( @opts.http_harvest_last ) end # # Audits the page queue # def audit_page_queue # this will run until no new elements appear for the given page while( !@page_queue.empty? && page = @page_queue.pop ) # audit the page exception_jail{ run_mods( page ) } harvest_http_responses if !@opts.http_harvest_last end end # # Returns the results of the audit as an {AuditStore} instance # # @see AuditStore # # @return [AuditStore] # def audit_store( fresh = true ) # restore the original redundancy rules and their counters @opts.redundant = @orig_redundant opts = @opts.to_h opts['mods'] = @modules.keys if( !fresh && @store ) return @store else return @store = AuditStore.new( { :version => version( ), :revision => REVISION, :options => opts, :sitemap => audit_store_sitemap || [], :issues => @modules.results( ).deep_clone, :plugins => @plugin_store }) end end alias :auditstore :audit_store # # Special sitemap for the auditstore. # # Used only under special circumstances, will usually return the {#sitemap} # but can be overridden by the {::Arachni::RPC::Framework}. # # @return [Array] # def audit_store_sitemap @override_sitemap && !@override_sitemap.empty? ? @override_sitemap : @sitemap end # # Adds an object to the plugin store. # # Should only be called once, if an entry for a plugin already exists # it will just return. # # @param [String] plugin plugin/owner name # @param [Object] obj object to store # def plugin_store( plugin, obj ) name = '' @plugins.each_pair { |k, v| if plugin.class.name == v.name name = k break end } return if @plugin_store[name] @plugin_store[name] = { :results => obj }.merge( plugin.class.info ) end # # Returns an array of hashes with information # about all available modules # # @return [Array] # def lsmod @modules.available.map { |name| path = @modules.name_to_path( name ) next if !lsmod_match?( path ) @modules[name].info.merge( :mod_name => name, :author => [@modules[name].info[:author]].flatten.map { |a| a.strip }, :path => path.strip ) }.compact ensure @modules.clear end # # Returns an array of hashes with information # about all available reports # # @return [Array] # def lsrep @reports.available.map { |report| path = @reports.name_to_path( report ) next if !lsrep_match?( path ) @reports[report].info.merge( :rep_name => report, :path => path, :author => [@reports[report].info[:author]].flatten.map { |a| a.strip } ) }.compact ensure @reports.clear end # # Returns an array of hashes with information # about all available reports # # @return [Array] # def lsplug @plugins.available.map { |plugin| path = @plugins.name_to_path( plugin ) next if !lsplug_match?( path ) @plugins[plugin].info.merge( :plug_name => plugin, :path => path, :author => [@plugins[plugin].info[:author]].flatten.map { |a| a.strip } ) }.compact ensure @plugins.clear end # # @return [Bool] true if the framework is running # def running? @running end # # @return [Bool] true if the framework is paused or in the process of # def paused? !@paused.empty? end # # @return [True] pauses the framework on a best effort basis, # might take a while to take effect # def pause! @spider.pause! if @spider @paused << caller return true end # # @return [True] resumes the scan/audit # def resume! @paused.delete( caller ) @spider.resume! if @spider return true end # # Returns the version of the framework # # @return [String] # def version Arachni::VERSION end # # Returns the revision of the {Framework} (this) class # # @return [String] # def revision REVISION end # # Cleans up the framework; should be called after running the audit or # after canceling a running scan. # # It stops the clock, waits for the plugins to finish up, register # their results and also refreshes the auditstore. # # It also runs {#audit_queue} in case any new pages have been added by the plugins. # # @param [Bool] skip_audit_queue skips running {#audit_queue}, # set to true if you don't want any delays. # # @return [True] # def clean_up!( skip_audit_queue = false ) @opts.finish_datetime = Time.now @opts.delta_time = @opts.finish_datetime - @opts.start_datetime # make sure this is disabled or it'll break report output @@only_positives = false @running = false # wait for the plugins to finish @plugins.block! # a plug-in may have updated the page queue, rock it! audit_queue if !skip_audit_queue # refresh the audit store audit_store( true ) return true end private def caller if /^(.+?):(\d+)(?::in `(.*)')?/ =~ ::Kernel.caller[1] return Regexp.last_match[1] end end def wait_if_paused while( paused? ) ::IO::select( nil, nil, nil, 1 ) end end # # Prepares the user agent to be used throughout the system. # def prepare_user_agent if( !@opts.user_agent ) @opts.user_agent = 'Arachni/' + version( ) end if( @opts.authed_by ) authed_by = " (Scan authorized by: #{@opts.authed_by})" @opts.user_agent += authed_by end end def prepare_cookie_jar( ) return if !@opts.cookie_jar || !@opts.cookie_jar.is_a?( String ) # make sure that the provided cookie-jar file exists if !File.exist?( @opts.cookie_jar ) raise( Arachni::Exceptions::NoCookieJar, 'Cookie-jar \'' + @opts.cookie_jar + '\' doesn\'t exist.' ) end end # # Takes care of page audit and module execution # # It will audit one page at a time as discovered by the spider
# and recursively check for new elements that may have
# appeared during the audit. # # When no new elements appear the recursion will stop and a new page
# will be accepted. # # @see Page # # @param [Page] page # def run_mods( page ) return if !page print_line print_status( "Auditing: [HTTP: #{page.code}] " + page.url ) call_on_run_mods( page.deep_clone ) @current_url = page.url.to_s @modules.values.each { |mod| wait_if_paused run_mod( mod, page.deep_clone ) } @auditmap << page.url @sitemap |= @auditmap @sitemap.uniq! if( !@opts.http_harvest_last ) harvest_http_responses( ) end end def harvest_http_responses print_status( 'Harvesting HTTP responses...' ) print_info( 'Depending on server responsiveness and network' + ' conditions this may take a while.' ) # grab updated pages http.trainer.flush_pages.each { |page| push_to_page_queue( page ) } # run all the queued HTTP requests and harvest the responses http.run http.trainer.flush_pages.each { |page| push_to_page_queue( page ) } end # # Passes a page to the module and runs it.
# It also handles any exceptions thrown by the module at runtime. # # @see Page # # @param [Class] mod the module to run # @param [Page] page # def run_mod( mod, page ) return if !run_mod?( mod, page ) begin @modules.run_one( mod, page, self ) rescue SystemExit raise rescue Exception => e print_error( 'Error in ' + mod.to_s + ': ' + e.to_s ) print_error_backtrace( e ) end end # # Determines whether or not to run the module against the given page # depending on which elements exist in the page, which elements the module # is configured to audit and user options. # # @param [Class] mod the module to run # @param [Page] page # # @return [Bool] # def run_mod?( mod, page ) return true if( !mod.info[:elements] || mod.info[:elements].empty? ) elems = { Issue::Element::LINK => page.links && page.links.size > 0 && @opts.audit_links, Issue::Element::FORM => page.forms && page.forms.size > 0 && @opts.audit_forms, Issue::Element::COOKIE => page.cookies && page.cookies.size > 0 && @opts.audit_cookies, Issue::Element::HEADER => page.headers && page.headers.size > 0 && @opts.audit_headers, Issue::Element::BODY => true, Issue::Element::PATH => true, Issue::Element::SERVER => true, } elems.each_pair { |elem, expr| return true if mod.info[:elements].include?( elem ) && expr } return false end def lsrep_match?( path ) regexp_array_match( @opts.lsrep, path ) end def lsmod_match?( path ) regexp_array_match( @opts.lsmod, path ) end def lsplug_match?( path ) regexp_array_match( @opts.lsplug, path ) end def regexp_array_match( regexps, str ) cnt = 0 regexps.each { |filter| cnt += 1 if str =~ filter } return true if cnt == regexps.size end end end