lib/ronin/web/cli/commands/spider.rb in ronin-web-1.0.0.beta3 vs lib/ronin/web/cli/commands/spider.rb in ronin-web-1.0.0.beta4
- old
+ new
@@ -81,10 +81,11 @@
# --domain DOMAIN Spiders the whole domain
# --site URL Spiders the website, starting at the URL
# --print-status Print the status codes for each URL
# --print-headers Print response headers for each URL
# --print-header NAME Prints a specific header
+ # --history FILE The history file
# --archive DIR Archive every visited page to the DIR
# --git-archive DIR Archive every visited page to the git repository
# -X, --xpath XPATH Evaluates the XPath on each HTML page
# -C, --css-path XPATH Evaluates the CSS-path on each HTML page
# -h, --help Print help information
@@ -388,20 +389,26 @@
type: String,
usage: 'URL'
},
desc: 'Spiders the website, starting at the URL'
- option :print_verbose, desc: 'Print the status codes for each URL'
+ option :print_stauts, desc: 'Print the status codes for each URL'
option :print_headers, desc: 'Print response headers for each URL'
option :print_header, value: {
type: String,
usage: 'NAME'
},
desc: 'Prints a specific header'
+ option :history, value: {
+ type: String,
+ usage: 'FILE'
+ },
+ desc: 'The history file'
+
option :archive, value: {
type: String,
usage: 'DIR'
},
desc: 'Archive every visited page to the DIR'
@@ -550,55 +557,32 @@
Web::Spider::Archive.open(options[:archive])
elsif options[:git_archive]
Web::Spider::GitArchive.open(options[:git_archive])
end
+ history_file = if options[:history]
+ File.open(options[:history],'w')
+ end
+
agent = new_agent do |agent|
agent.every_page do |page|
print_page(page)
end
agent.every_failed_url do |url|
print_verbose "failed to request #{url}"
end
- if options[:print_hosts]
- agent.every_host do |host|
- print_verbose "spidering new host #{host}"
- end
- end
+ define_printing_callbacks(agent)
- if options[:print_certs]
- agent.every_cert do |cert|
- print_verbose "encountered new certificate for #{cert.subject.common_name}"
+ if history_file
+ agent.every_page do |page|
+ history_file.puts(page.url)
+ history_file.flush
end
end
- if options[:print_js_strings]
- agent.every_js_string do |string|
- print_content string
- end
- end
-
- if options[:print_html_comments]
- agent.every_html_comment do |comment|
- print_content comment
- end
- end
-
- if options[:print_js_comments]
- agent.every_js_comment do |comment|
- print_content comment
- end
- end
-
- if options[:print_comments]
- agent.every_comment do |comment|
- print_content comment
- end
- end
-
if archive
agent.every_ok_page do |page|
archive.write(page.url,page.body)
end
end
@@ -628,13 +612,61 @@
agent.collected_certs.each do |cert|
puts cert
puts
end
end
+ ensure
+ if options[:history]
+ history_file.close
+ end
end
#
+ # Defines callbacks that print information.
+ #
+ # @param [Ronin::Web::Spider::Agent] agent
+ # The newly created agent.
+ #
+ def define_printing_callbacks(agent)
+ if options[:print_hosts]
+ agent.every_host do |host|
+ print_verbose "spidering new host #{host}"
+ end
+ end
+
+ if options[:print_certs]
+ agent.every_cert do |cert|
+ print_verbose "encountered new certificate for #{cert.subject.common_name}"
+ end
+ end
+
+ if options[:print_js_strings]
+ agent.every_js_string do |string|
+ print_content string
+ end
+ end
+
+ if options[:print_html_comments]
+ agent.every_html_comment do |comment|
+ print_content comment
+ end
+ end
+
+ if options[:print_js_comments]
+ agent.every_js_comment do |comment|
+ print_content comment
+ end
+ end
+
+ if options[:print_comments]
+ agent.every_comment do |comment|
+ print_content comment
+ end
+ end
+ end
+
+ #
# Creates a new web spider agent.
#
# @yield [agent]
# The given block will be given the newly created and configured
# web spider agent.
@@ -715,11 +747,11 @@
# Prints the status of a page.
#
# @param [Spidr::Page] page
# A spidered page.
#
- def print_verbose(page)
+ def print_status(page)
if page.code < 300
print "#{colors.bright_green(page.code)} "
elsif page.code < 400
print "#{colors.bright_yellow(page.code)} "
elsif page.code < 500
@@ -752,10 +784,10 @@
#
# @param [Spidr::Page] page
# A spidered page.
#
def print_page(page)
- print_verbose(page) if options[:print_verbose]
+ print_status(page) if options[:print_status]
print_url(page)
if options[:print_headers]
print_headers(page)
elsif options[:print_header]