lib/ronin/web/cli/commands/spider.rb in ronin-web-1.0.0 vs lib/ronin/web/cli/commands/spider.rb in ronin-web-1.0.1
- old
+ new
@@ -1,5 +1,6 @@
+# frozen_string_literal: true
#
# ronin-web - A collection of useful web helper methods and commands.
#
# Copyright (c) 2006-2023 Hal Brodigan (postmodern.mod3 at gmail.com)
#
@@ -117,22 +118,22 @@
default: Spidr.read_timeout
},
desc: 'Sets the read timeout'
option :ssl_timeout, value: {
- type: Integer,
- usage: 'SECS',
- default: Spidr.ssl_timeout
- },
- desc: 'Sets the SSL connection timeout'
+ type: Integer,
+ usage: 'SECS',
+ default: Spidr.ssl_timeout
+ },
+ desc: 'Sets the SSL connection timeout'
option :continue_timeout, value: {
- type: Integer,
- usage: 'SECS',
- default: Spidr.continue_timeout
- },
- desc: 'Sets the continue timeout'
+ type: Integer,
+ usage: 'SECS',
+ default: Spidr.continue_timeout
+ },
+ desc: 'Sets the continue timeout'
option :keep_alive_timeout, value: {
type: Integer,
usage: 'SECS',
default: Spidr.keep_alive_timeout
@@ -166,74 +167,72 @@
@host_headers[name] = value
end
option :user_agent, value: {
- type: String,
+ type: String,
usage: 'USER-AGENT'
},
desc: 'Sets the User-Agent string'
option :user_agent_string, short: '-U',
value: {
- type: String,
+ type: String,
usage: 'STRING'
},
desc: 'The User-Agent string to use' do |ua|
@user_agent = ua
end
option :user_agent, short: '-u',
value: {
- type: Hash[
- Support::Network::HTTP::UserAgents::ALIASES.keys.map { |key|
- [key.to_s.tr('_','-'), key]
- }
- ]
+ type: Support::Network::HTTP::UserAgents::ALIASES.transform_keys { |key|
+ key.to_s.tr('_','-')
+ }
},
desc: 'The User-Agent to use' do |name|
@user_agent = name
end
option :referer, short: '-R',
value: {
- type: String,
+ type: String,
usage: 'URL'
},
desc: 'Sets the Referer URL'
option :delay, short: '-d',
value: {
- type: Numeric,
+ type: Numeric,
usage: 'SECS'
},
desc: 'Sets the delay in seconds between each request'
option :limit, short: '-l',
value: {
- type: Integer,
+ type: Integer,
usage: 'COUNT'
},
desc: 'Only spiders up to COUNT pages'
option :max_depth, short: '-d',
value: {
- type: Integer,
- usage: 'DEPTH',
+ type: Integer,
+ usage: 'DEPTH'
},
desc: 'Only spiders up to max depth'
option :enqueue, value: {
- type: String,
+ type: String,
usage: 'URL'
},
desc: 'Adds the URL to the queue' do |url|
@queue << url
end
option :visited, value: {
- type: String,
+ type: String,
usage: 'URL'
},
desc: 'Marks the URL as previously visited' do |url|
@history << url
end
@@ -241,11 +240,11 @@
option :strip_fragments, desc: 'Enables/disables stripping the fragment component of every URL'
option :strip_query, desc: 'Enables/disables stripping the query component of every URL'
option :visit_host, value: {
- type: String,
+ type: String,
usage: 'HOST'
},
desc: 'Visit URLs with the matching host name' do |host|
@visit_hosts << host
end
@@ -265,19 +264,19 @@
desc: 'Ignore the host name' do |host|
@ignore_hosts << host
end
option :ignore_hosts_like, value: {
- type: Regexp,
- usage: '/REGEX/'
- },
- desc: 'Ignore the host names matching the REGEX' do |regex|
- @ignore_hosts << regex
- end
+ type: Regexp,
+ usage: '/REGEX/'
+ },
+ desc: 'Ignore the host names matching the REGEX' do |regex|
+ @ignore_hosts << regex
+ end
option :visit_port, value: {
- type: Integer,
+ type: Integer,
usage: 'PORT'
},
desc: 'Visit URLs with the matching port number' do |port|
@visit_ports << port
end
@@ -297,19 +296,19 @@
desc: 'Ignore the port number' do |port|
@ignore_ports << port
end
option :ignore_ports_like, value: {
- type: Regexp,
- usage: '/REGEX/'
- },
- desc: 'Ignore the port numbers matching the REGEXP' do |regex|
- @ignore_ports << regex
- end
+ type: Regexp,
+ usage: '/REGEX/'
+ },
+ desc: 'Ignore the port numbers matching the REGEXP' do |regex|
+ @ignore_ports << regex
+ end
option :visit_link, value: {
- type: String,
+ type: String,
usage: 'URL'
},
desc: 'Visit the URL' do |link|
@visit_links << link
end
@@ -329,40 +328,40 @@
desc: 'Ignore the URL' do |link|
@ignore_links << link
end
option :ignore_links_like, value: {
- type: Regexp,
- usage: '/REGEX/'
- },
- desc: 'Ignore URLs matching the REGEX' do |regex|
- @ignore_links << regex
- end
+ type: Regexp,
+ usage: '/REGEX/'
+ },
+ desc: 'Ignore URLs matching the REGEX' do |regex|
+ @ignore_links << regex
+ end
option :visit_ext, value: {
- type: String,
- usage: 'FILE_EXT'
- },
- desc: 'Visit URLs with the matching file ext' do |ext|
- @visit_exts << ext
- end
+ type: String,
+ usage: 'FILE_EXT'
+ },
+ desc: 'Visit URLs with the matching file ext' do |ext|
+ @visit_exts << ext
+ end
option :visit_exts_like, value: {
- type: Regexp,
- usage: '/REGEX/'
- },
- desc: 'Visit URLs with file exts that match the REGEX' do |regex|
- @visit_exts << regex
- end
+ type: Regexp,
+ usage: '/REGEX/'
+ },
+ desc: 'Visit URLs with file exts that match the REGEX' do |regex|
+ @visit_exts << regex
+ end
option :ignore_ext, value: {
- type: String,
- usage: 'FILE_EXT'
- },
- desc: 'Ignore the URLs with the file ext' do |ext|
- @ignore_exts << ext
- end
+ type: String,
+ usage: 'FILE_EXT'
+ },
+ desc: 'Ignore the URLs with the file ext' do |ext|
+ @ignore_exts << ext
+ end
option :ignore_exts_like, value: {
type: Regexp,
usage: '/REGEX/'
},
@@ -372,51 +371,51 @@
option :robots, short: '-r',
desc: 'Specifies whether to honor robots.txt'
option :host, value: {
- type: String,
+ type: String,
usage: 'HOST'
},
desc: 'Spiders the specific HOST'
option :domain, value: {
- type: String,
- usage: 'DOMAIN',
+ type: String,
+ usage: 'DOMAIN'
},
desc: 'Spiders the whole domain'
option :site, value: {
- type: String,
+ type: String,
usage: 'URL'
},
desc: 'Spiders the website, starting at the URL'
option :print_stauts, desc: 'Print the status codes for each URL'
option :print_headers, desc: 'Print response headers for each URL'
option :print_header, value: {
- type: String,
+ type: String,
usage: 'NAME'
},
desc: 'Prints a specific header'
option :history, value: {
- type: String,
+ type: String,
usage: 'FILE'
},
desc: 'The history file'
option :archive, value: {
- type: String,
+ type: String,
usage: 'DIR'
},
desc: 'Archive every visited page to the DIR'
option :git_archive, value: {
- type: String,
+ type: String,
usage: 'DIR'
},
desc: 'Archive every visited page to the git repository'
option :xpath, short: '-X',
@@ -586,10 +585,12 @@
archive.write(page.url,page.body)
end
end
end
+ # post-spidering tasks
+
if options[:git_archive]
archive.commit "Updated #{Time.now}"
end
if options[:print_hosts]
@@ -697,9 +698,10 @@
# @return [Hash{Symbol => Object}]
# The keyword arguments for `Ronin::Web::Spider::Agent#initialize`.
#
def agent_kwargs
kwargs = {}
+
kwargs[:proxy] = options[:proxy] if options[:proxy]
unless @default_headers.empty?
kwargs[:default_headers] = @default_headers
end