=begin Copyright 2010-2013 Tasos Laskos Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. =end require 'ostruct' module Arachni lib = Options.dir['lib'] require lib + 'rpc/client/instance' require lib + 'rpc/client/dispatcher' require lib + 'rpc/server/base' require lib + 'rpc/server/active_options' require lib + 'rpc/server/output' require lib + 'rpc/server/framework' module RPC class Server # # Represents an Arachni instance (or multiple instances when running a # high-performance scan) and serves as a central point of access to the # scanner's components: # # * {Instance self} -- mapped to `service` # * {Options} -- mapped to `opts` # * {Framework} -- mapped to `framework` # * {Module::Manager} -- mapped to `modules` # * {Plugin::Manager} -- mapped to `plugins` # * {Spider} -- mapped to `spider` # # # Convenience methods # # The `service` RPC handler (which is this class) provides convenience # methods which cover the most commonly used functionality so that you # won't have to concern yourself with any other RPC handler. # # This should be the only RPC API you'll ever need. # # Provided methods for: # # * Retrieving available components # * {#list_modules Modules} # * {#list_plugins Plugins} # * {#list_reports Reports} # * {#scan Configuring and running a scan} # * Retrieving progress information # * {#progress in aggregate form} (which includes a multitude of information) # * or simply by: # * {#busy? checking whether the scan is still in progress} # * {#status checking the status of the scan} # * {#pause Pausing}, {#resume resuming} or {#abort_and_report aborting} the scan. # * Retrieving the scan report # * {#report as a Hash} or a native {#auditstore AuditStore} object # * {#report_as in one of the supported formats} (as made available by the # {Reports report} components) # * {#shutdown Shutting down} # # (A nice simple example can be found in the {UI::CLI::RPC RPC command-line client} # interface.) # # @example A minimalistic example -- assumes Arachni is installed and available. # require 'arachni' # require 'arachni/rpc/client' # # instance = Arachni::RPC::Client::Instance.new( Arachni::Options.instance, # 'localhost:1111', 's3cr3t' ) # # instance.service.scan url: 'http://testfire.net', # audit_links: true, # audit_forms: true, # # load all XSS modules # modules: 'xss*' # # print 'Running.' # while instance.service.busy? # print '.' # sleep 1 # end # # # Grab the report as a native AuditStore object # report = instance.service.auditstore # # # Kill the instance and its process, no zombies please... # instance.service.shutdown # # puts # puts # puts 'Logged issues:' # report.issues.each do |issue| # puts " * #{issue.name} for input '#{issue.var}' at '#{issue.url}'." # end # # @note Ignore: # # * Inherited methods and attributes -- only public methods of this class are # accessible over RPC. # * `block` parameters, they are an RPC implementation detail for methods which # perform asynchronous operations. # # @note Avoid calling methods which return Arachni-specific objects (like {AuditStore}, # {Issue}, etc.) when you don't have these objects available on the client-side # (like when working from a non-Ruby platform or not having the Arachni framework # installed). # # @note Methods which expect `Symbol` type parameters will also accept `String` # types as well. # # For example, the following: # # instance.service.scan url: 'http://testfire.net' # # Is the same as: # # instance.service.scan 'url' => 'http://testfire.net' # # @author Tasos "Zapotek" Laskos # class Instance include UI::Output include Utilities private :error_logfile public :error_logfile # # Initializes the RPC interface and the framework. # # @param [Options] opts # @param [String] token Authentication token. # def initialize( opts, token ) banner @opts = opts @token = token @server = Base.new( @opts, token ) @server.logger.level = @opts.datastore[:log_level] if @opts.datastore[:log_level] @opts.datastore[:token] = token debug if @opts.debug if @opts.reroute_to_logfile reroute_to_file "#{@opts.dir['logs']}/Instance - #{Process.pid}-#{@opts.rpc_port}.log" else reroute_to_file false end set_error_logfile "#{@opts.dir['logs']}/Instance - #{Process.pid}-#{@opts.rpc_port}.error.log" set_handlers # trap interrupts and exit cleanly when required %w(QUIT INT).each do |signal| trap( signal ){ shutdown } if Signal.list.has_key?( signal ) end run end # @return [true] def alive? @server.alive? end # @return [Bool] # `true` if the scan is initializing or running, `false` otherwise. # If a scan is started by {#scan} then this method should be used # instead of {Framework#busy?}. def busy? @scan_initializing ? true : @framework.busy? end # @param (see Arachni::RPC::Server::Framework#errors) # @return (see Arachni::RPC::Server::Framework#errors) def errors( starting_line = 0, &block ) @framework.errors( starting_line, &block ) end # @return (see Arachni::Framework#list_modules) def list_modules @framework.list_modules end # @return (see Arachni::Framework#list_plugins) def list_plugins @framework.list_plugins end # @return (see Arachni::Framework#list_reports) def list_reports @framework.list_reports end # # Pauses the running scan on a best effort basis. # # @see Framework#pause def pause( &block ) @framework.pause( &block ) end # # Resumes a paused scan. # # @see Framework#resume def resume( &block ) @framework.resume( &block ) end # # Cleans up and returns the report. # # @param [Symbol] report_type # Report type to return, `:hash` for {#report} or `:audistore` for # {#auditstore}. # # @return [Hash,AuditStore] # # @note Don't forget to {#shutdown} the instance once you get the report. # # @see Framework#clean_up # @see #abort_and_report # @see #report # @see #auditstore # def abort_and_report( report_type = :hash, &block ) @framework.clean_up do block.call report_type.to_sym == :auditstore ? auditstore : report end end # # Cleans up and delegates to {#report_as}. # # @param (see #report_as) # @return (see #report_as) # # @note Don't forget to {#shutdown} the instance once you get the report. # # @see Framework#clean_up # @see #abort_and_report # @see #report_as # def abort_and_report_as( name, &block ) @framework.clean_up do block.call report_as( name ) end end # @return (see Arachni::Framework#auditstore) # @see Framework#auditstore def auditstore @framework.auditstore end # @return (see Arachni::RPC::Server::Framework#report) # @see Framework#report def report @framework.report end # @param [String] name # Name of the report component to run, as presented by {#list_reports}'s # `:shortname` key. # # @return (see Arachni::Framework#report_as) # @see Framework#report_as def report_as( name ) @framework.report_as( name ) end # @return (see Framework#status) # @see Framework#status def status @framework.status end # # Simplified version of {Framework#progress}. # # # Recommended usage # # Please request from the method only the things you are going to actually # use, otherwise you'll just be wasting bandwidth. # In addition, ask to **not** be served data you already have, like issues # or error messages. # # To be kept completely up to date on the progress of a scan (i.e. receive # new issues and error messages asap) in an efficient manner, you will need # to keep track of the issues and error messages you already have and # explicitly tell the method to not send the same data back to you on # subsequent calls. # # ## Retrieving errors (`:errors` option) without duplicate data # # This is done by telling the method how many error messages you already # have and you will be served the errors from the error-log that are past # that line. # So, if you were to use a loop to get fresh progress data it would look # like so: # # error_cnt = 0 # i = 0 # while sleep 1 # # Test method, triggers an error log... # instance.service.error_test "BOOM! #{i+=1}" # # # Only request errors we don't already have # errors = instance.service.progress( with: { errors: error_cnt } )['errors'] # error_cnt += errors.size # # # You will only see new errors # puts errors.join("\n") # end # # ## Retrieving issues without duplicate data # # In order to be served only new issues you will need to let the method # know which issues you already have. This is done by providing a list # of {Issue#digest digests} for the issues you already know about. # # issue_digests = [] # while sleep 1 # issues = instance.service.progress( # # Ask for native Arachni::Issue object instead of hashes # with: :native_issues, # # Only request issues we don't already have # without: { issues: issue_digests } # )['issues'] # # issue_digests |= issues.map( &:digest ) # # # You will only see new issues # issues.each do |issue| # puts " * #{issue.name} for input '#{issue.var}' at '#{issue.url}'." # end # end # # _If your client is on a platform that has no access to native Arachni # objects, you'll have to calculate the {Issue#digest digests} yourself._ # # @param [Hash] options # Options about what progress data to retrieve and return. # @option options [Array] :with # Specify data to include: # # * :native_issues -- Discovered issues as {Arachni::Issue} objects. # * :issues -- Discovered issues as {Arachni::Issue#to_h hashes}. # * :instances -- Statistics and info for slave instances. # * :errors -- Errors and the line offset to use for {#errors}. # Pass as a hash, like: `{ errors: 10 }` # @option options [Array] :without # Specify data to exclude: # # * :stats -- Don't include runtime statistics. # * :issues -- Don't include issues with the given {Arachni::Issue#digest digests}. # Pass as a hash, like: `{ issues: [...] }` # # @return [Hash] # * `stats` -- General runtime statistics (merged when part of Grid) # (enabled by default) # * `status` -- {#status} # * `busy` -- {#busy?} # * `issues` -- {Framework#issues_as_hash} or {Framework#issues} # (disabled by default) # * `instances` -- Raw `stats` for each running instance (only when part # of Grid) (disabled by default) # * `errors` -- {#errors} (disabled by default) # def progress( options = {}, &block ) with = parse_progress_opts( options, :with ) without = parse_progress_opts( options, :without ) @framework.progress( as_hash: !with.include?( :native_issues ), issues: with.include?( :native_issues ) || with.include?( :issues ), stats: !without.include?( :stats ), slaves: with.include?( :instances ), messages: false, errors: with[:errors] ) do |data| data['instances'] ||= [] if with.include?( :instances ) data['busy'] = busy? if data['issues'] data['issues'] = data['issues'].dup if without[:issues].is_a? Array data['issues'].reject! do |i| without[:issues].include?( i.is_a?( Hash ) ? i['digest'] : i.digest ) end end end block.call( data ) end end # # Configures and runs a scan. # # @note If you use this method to start the scan use {#busy?} instead of # {Framework#busy?} to check if the scan is still running. # # @note Options marked with an asterisk are required. # @note Options which expect patterns will interpret their arguments as # regular expressions regardless of their type. # @note When using more than one Instance, the `http_req_limit` and # `link_count_limit` options will be divided by the number of Instance to # be used. # # @param [Hash] opts # Scan options to be passed to {Options#set} (along with some extra ones # to keep configuration in one place). # # _The options presented here are the most commonly used ones, in # actuality, you can use any {Options} attribute._ # @option opts [String] *:url # Target URL to audit. # @option opts [Boolean] :audit_links (false) # Enable auditing of link inputs. # @option opts [Boolean] :audit_forms (false) # Enable auditing of form inputs. # @option opts [Boolean] :audit_cookies (false) # Enable auditing of cookie inputs. # @option opts [Boolean] :audit_headers (false) # Enable auditing of header inputs. # @option opts [String,Array] :modules ([]) # Modules to load, by name. # # # To load all modules use the wildcard on its own # '*' # # # To load all XSS and SQLi modules: # [ 'xss*', 'sqli*' ] # # @option opts [Hash] :plugins ({}) # Plugins to load, by name, along with their options. # # { # 'proxy' => {}, # empty options # 'autologin' => { # 'url' => 'http://demo.testfire.net/bank/login.aspx', # 'params' => 'uid=jsmith&passw=Demo1234', # 'check' => 'MY ACCOUNT' # }, # } # @option opts [Integer] :link_count_limit (nil) # Limit the amount of pages to be crawled and audited. # @option opts [Integer] :depth_limit (nil) # Directory depth limit. # # How deep Arachni should go into the website structure. # @option opts [Array] :exclude ([]) # URLs that match any of the given patterns will be ignored. # # [ 'logout', /skip.*.me too/i ] # # @option opts [Array] :exclude_pages ([]) # Exclude pages from the crawl and audit processes based on their # content (i.e. HTTP response bodies). # # [ /.*forbidden.*/, "I'm a weird 404 and I should be ignored" ] # # @option opts [Array] :exclude_vectors ([]) # Exclude input vectors from the audit, by name. # # [ 'sessionid', 'please_dont_audit_me' ] # # @option opts [Array] :include ([]) # Only URLs that match any of the given patterns will be followed and audited. # # [ 'only-follow-me', 'follow-me-as-well' ] # # @option opts [Hash<,Integer>] :redundant ({}) # Redundancy patterns to limit how many times certain paths should be # followed. # # { "follow_me_3_times" => 3, /follow_me_5_times/ => 5 } # # Useful when scanning pages that create an large number of # pages like galleries and calendars. # # @option opts [Array] :restrict_paths ([]) # Restrict the audit to the provided paths. # # The crawl phase gets skipped and these paths are used instead. # @option opts [Array] :extend_paths ([]) # Extend the scope of the crawl and audit with the provided paths. # @option opts [Array] :cookies ({}) # Cookies to use for the HTTP requests. # # [ # 'secret=blah', # { # 'userid' => '1', # }, # { # name: 'sessionid', # value: 'fdfdfDDfsdfszdf', # http_only: true # } # ] # # _Cookie values will be encoded automatically._ # # @option opts [Integer] :http_req_limit (20) # HTTP request concurrency limit. # @option opts [String] :user_agent ('Arachni/v') # User agent to use. # @option opts [String] :authed_by (nil) # The person who authorized the scan. # # John Doe # # @option opts [Array] :slaves **(Experimental)** # Info of Instances to {Framework#enslave enslave}. # # [ # { url: 'address:port', token: 's3cr3t' }, # { url: 'anotheraddress:port', token: '3v3nm0r3s3cr3t' } # ] # # @option opts [Bool] :grid (false) **(Experimental)** # Utilise the Dispatcher Grid to obtain slave instances for a # high-performance distributed scan. # @option opts [Integer] :spawns (0) **(Experimental)** # The amount of slaves to spawn. # @option opts [Array] :pages ([]) **(Experimental)** # Extra pages to audit. # @option opts [Array] :elements ([]) **(Experimental)** # Elements to which to restrict the audit. # # Using elements IDs as returned by {Element::Capabilities::Auditable#scope_audit_id}. # def scan( opts = {}, &block ) # If the instance isn't clean bail out now. if @scan_initializing || @framework.busy? block.call false return false end # Normalize this sucker to have symbols as keys. opts = hash_keys_to_sym( opts, false ) slaves = opts[:slaves] || [] spawn_count = opts[:spawns].to_i if opts[:grid] && spawn_count <= 0 fail ArgumentError, 'Option \'spawns\' must be greater than 1 for Grid scans.' end if (opts[:grid] || spawn_count > 0) && [opts[:restrict_paths]].flatten.compact.any? fail ArgumentError, 'Option \'restrict_paths\' is not supported when in High-Performance mode.' end # There may be follow-up/retry calls by the client in cases of network # errors (after the request has reached us) so we need to keep minimal # track of state in order to bail out on subsequent calls. @scan_initializing = true # Plugins option needs to be a hash... if opts[:plugins] && opts[:plugins].is_a?( Array ) opts[:plugins] = opts[:plugins].inject( {} ) { |h, n| h[n] = {}; h } end @active_options.set( opts ) if @framework.opts.url.to_s.empty? fail ArgumentError, 'Option \'url\' is mandatory.' end if spawn_count.to_i > 0 %w(link_count_limit http_req_limit).each do |name| next if !(v = @active_options.send( name )) @active_options.send( "#{name}=", v / (spawn_count + 1) ) end end @framework.update_page_queue( opts[:pages] || [] ) @framework.restrict_to_elements( opts[:elements] || [] ) opts[:modules] ||= opts[:mods] @framework.modules.load opts[:modules] if opts[:modules] @framework.plugins.load opts[:plugins] if opts[:plugins] # If the Dispatchers are in a Grid config but the user has not requested # a Grid scan force the framework to ignore the Grid and work with # the instances we give it. @framework.ignore_grid if has_dispatcher? && !opts[:grid] # Starts the scan after all necessary options have been set. after = proc { block.call @framework.run; @scan_initializing = false } if opts[:grid] # # If a Grid scan has been selected then just set us as the master # and set the spawn count as max slaves. # # The Framework and the Grid will sort out the rest... # @framework.set_as_master @framework.opts.max_slaves = spawn_count # Rock n' roll! after.call else # Handles each spawn, enslaving it for a high-performance distributed scan. each = proc { |slave, iter| @framework.enslave( slave ){ iter.next } } spawn( spawn_count ) do |spawns| # Add our spawns to the slaves list which was passed as an option. slaves |= spawns # Process the Instances. ::EM::Iterator.new( slaves, slaves.empty? ? 1 : slaves.size ). each( each, after ) end end true end # Makes the server go bye-bye...Lights out! def shutdown print_status 'Shutting down...' t = [] @framework.instance_eval do @instances.each do |instance| # Don't know why but this works better than EM's stuff t << Thread.new { connect_to_instance( instance ).service.shutdown! } end end t.join @server.shutdown true end alias :shutdown! :shutdown # @param (see Arachni::RPC::Server::Framework#auditstore) # @return (see Arachni::RPC::Server::Framework#auditstore) # # @deprecated def output( &block ) @framework.output( &block ) end # @private def error_test( str ) print_error str.to_s end private def parse_progress_opts( options, key ) parsed = {} [options.delete( key ) || options.delete( key.to_s )].compact.each do |w| case w when Array w.compact.flatten.each do |q| case q when String, Symbol parsed[q.to_sym] = nil when Hash parsed.merge!( hash_keys_to_sym( q ) ) end end when String, Symbol parsed[w.to_sym] = nil when Hash parsed.merge!( hash_keys_to_sym( w ) ) end end parsed end # # Provides `num` Instances. # # If this Instance has a Dispatcher, all Instances will be requested # from it. Otherwise, new Instance processes will be directly spawned # and immediately detached. # # @param [Integer] num Amount of Instances to return. # # @return [Array] Instance info (urls and tokens). # def spawn( num, &block ) if num <= 0 block.call [] return end q = ::EM::Queue.new if has_dispatcher? num.times do dispatcher.dispatch( @framework.self_url ) do |instance| q << instance end end else num.times do port = available_port token = generate_token Process.detach ::EM.fork_reactor { # make sure we start with a clean env (namepsace, opts, etc) Framework.reset Options.rpc_port = port Server::Instance.new( Options.instance, token ) } instance_info = { 'url' => "#{Options.rpc_address}:#{port}", 'token' => token } wait_till_alive( instance_info ) { q << instance_info } end end spawns = [] num.times do q.pop do |r| spawns << r block.call( spawns ) if spawns.size == num end end end def wait_till_alive( instance_info, &block ) opts = ::OpenStruct.new # if after 100 retries we still haven't managed to get through give up opts.max_retries = 100 opts.ssl_ca = @opts.ssl_ca, opts.ssl_pkey = @opts.node_ssl_pkey || @opts.ssl_pkey, opts.ssl_cert = @opts.node_ssl_cert || @opts.ssl_cert Client::Instance.new( opts, instance_info['url'], instance_info['token'] ).service.alive? do |alive| if alive.rpc_exception? raise alive else block.call alive end end end # Starts the HTTPS server and the RPC service. def run print_status 'Starting the server...' @server.run end def dispatcher @dispatcher ||= Client::Dispatcher.new( @opts, @opts.datastore[:dispatcher_url] ) end def has_dispatcher? !!@opts.datastore[:dispatcher_url] end # # Outputs the Arachni banner. # # Displays version number, revision number, author details etc. # def banner puts BANNER puts puts end # Prepares all the RPC handlers. def set_handlers @server.add_async_check do |method| # methods that expect a block are async method.parameters.flatten.include?( :block ) end @framework = Server::Framework.new( Options.instance ) @active_options = Server::ActiveOptions.new( @framework ) @server.add_handler( 'service', self ) @server.add_handler( 'framework', @framework ) @server.add_handler( "opts", @active_options ) @server.add_handler( 'spider', @framework.spider ) @server.add_handler( 'modules', @framework.modules ) @server.add_handler( 'plugins', @framework.plugins ) end end end end end