lib/ronin/web/cli/commands/spider.rb in ronin-web-1.0.0.beta3 vs lib/ronin/web/cli/commands/spider.rb in ronin-web-1.0.0.beta4

- old
+ new

@@ -81,10 +81,11 @@ # --domain DOMAIN Spiders the whole domain # --site URL Spiders the website, starting at the URL # --print-status Print the status codes for each URL # --print-headers Print response headers for each URL # --print-header NAME Prints a specific header + # --history FILE The history file # --archive DIR Archive every visited page to the DIR # --git-archive DIR Archive every visited page to the git repository # -X, --xpath XPATH Evaluates the XPath on each HTML page # -C, --css-path XPATH Evaluates the CSS-path on each HTML page # -h, --help Print help information @@ -388,20 +389,26 @@ type: String, usage: 'URL' }, desc: 'Spiders the website, starting at the URL' - option :print_verbose, desc: 'Print the status codes for each URL' + option :print_stauts, desc: 'Print the status codes for each URL' option :print_headers, desc: 'Print response headers for each URL' option :print_header, value: { type: String, usage: 'NAME' }, desc: 'Prints a specific header' + option :history, value: { + type: String, + usage: 'FILE' + }, + desc: 'The history file' + option :archive, value: { type: String, usage: 'DIR' }, desc: 'Archive every visited page to the DIR' @@ -550,55 +557,32 @@ Web::Spider::Archive.open(options[:archive]) elsif options[:git_archive] Web::Spider::GitArchive.open(options[:git_archive]) end + history_file = if options[:history] + File.open(options[:history],'w') + end + agent = new_agent do |agent| agent.every_page do |page| print_page(page) end agent.every_failed_url do |url| print_verbose "failed to request #{url}" end - if options[:print_hosts] - agent.every_host do |host| - print_verbose "spidering new host #{host}" - end - end + define_printing_callbacks(agent) - if options[:print_certs] - agent.every_cert do |cert| - print_verbose "encountered new certificate for #{cert.subject.common_name}" + if history_file + agent.every_page do |page| + history_file.puts(page.url) + history_file.flush end end - if options[:print_js_strings] - agent.every_js_string do |string| - print_content string - end - end - - if options[:print_html_comments] - agent.every_html_comment do |comment| - print_content comment - end - end - - if options[:print_js_comments] - agent.every_js_comment do |comment| - print_content comment - end - end - - if options[:print_comments] - agent.every_comment do |comment| - print_content comment - end - end - if archive agent.every_ok_page do |page| archive.write(page.url,page.body) end end @@ -628,13 +612,61 @@ agent.collected_certs.each do |cert| puts cert puts end end + ensure + if options[:history] + history_file.close + end end # + # Defines callbacks that print information. + # + # @param [Ronin::Web::Spider::Agent] agent + # The newly created agent. + # + def define_printing_callbacks(agent) + if options[:print_hosts] + agent.every_host do |host| + print_verbose "spidering new host #{host}" + end + end + + if options[:print_certs] + agent.every_cert do |cert| + print_verbose "encountered new certificate for #{cert.subject.common_name}" + end + end + + if options[:print_js_strings] + agent.every_js_string do |string| + print_content string + end + end + + if options[:print_html_comments] + agent.every_html_comment do |comment| + print_content comment + end + end + + if options[:print_js_comments] + agent.every_js_comment do |comment| + print_content comment + end + end + + if options[:print_comments] + agent.every_comment do |comment| + print_content comment + end + end + end + + # # Creates a new web spider agent. # # @yield [agent] # The given block will be given the newly created and configured # web spider agent. @@ -715,11 +747,11 @@ # Prints the status of a page. # # @param [Spidr::Page] page # A spidered page. # - def print_verbose(page) + def print_status(page) if page.code < 300 print "#{colors.bright_green(page.code)} " elsif page.code < 400 print "#{colors.bright_yellow(page.code)} " elsif page.code < 500 @@ -752,10 +784,10 @@ # # @param [Spidr::Page] page # A spidered page. # def print_page(page) - print_verbose(page) if options[:print_verbose] + print_status(page) if options[:print_status] print_url(page) if options[:print_headers] print_headers(page) elsif options[:print_header]