lib/ronin/web/cli/commands/spider.rb in ronin-web-1.0.0 vs lib/ronin/web/cli/commands/spider.rb in ronin-web-1.0.1

- old
+ new

@@ -1,5 +1,6 @@ +# frozen_string_literal: true # # ronin-web - A collection of useful web helper methods and commands. # # Copyright (c) 2006-2023 Hal Brodigan (postmodern.mod3 at gmail.com) # @@ -117,22 +118,22 @@ default: Spidr.read_timeout }, desc: 'Sets the read timeout' option :ssl_timeout, value: { - type: Integer, - usage: 'SECS', - default: Spidr.ssl_timeout - }, - desc: 'Sets the SSL connection timeout' + type: Integer, + usage: 'SECS', + default: Spidr.ssl_timeout + }, + desc: 'Sets the SSL connection timeout' option :continue_timeout, value: { - type: Integer, - usage: 'SECS', - default: Spidr.continue_timeout - }, - desc: 'Sets the continue timeout' + type: Integer, + usage: 'SECS', + default: Spidr.continue_timeout + }, + desc: 'Sets the continue timeout' option :keep_alive_timeout, value: { type: Integer, usage: 'SECS', default: Spidr.keep_alive_timeout @@ -166,74 +167,72 @@ @host_headers[name] = value end option :user_agent, value: { - type: String, + type: String, usage: 'USER-AGENT' }, desc: 'Sets the User-Agent string' option :user_agent_string, short: '-U', value: { - type: String, + type: String, usage: 'STRING' }, desc: 'The User-Agent string to use' do |ua| @user_agent = ua end option :user_agent, short: '-u', value: { - type: Hash[ - Support::Network::HTTP::UserAgents::ALIASES.keys.map { |key| - [key.to_s.tr('_','-'), key] - } - ] + type: Support::Network::HTTP::UserAgents::ALIASES.transform_keys { |key| + key.to_s.tr('_','-') + } }, desc: 'The User-Agent to use' do |name| @user_agent = name end option :referer, short: '-R', value: { - type: String, + type: String, usage: 'URL' }, desc: 'Sets the Referer URL' option :delay, short: '-d', value: { - type: Numeric, + type: Numeric, usage: 'SECS' }, desc: 'Sets the delay in seconds between each request' option :limit, short: '-l', value: { - type: Integer, + type: Integer, usage: 'COUNT' }, desc: 'Only spiders up to COUNT pages' option :max_depth, short: '-d', value: { - type: Integer, - usage: 'DEPTH', + type: Integer, + usage: 'DEPTH' }, desc: 'Only spiders up to max depth' option :enqueue, value: { - type: String, + type: String, usage: 'URL' }, desc: 'Adds the URL to the queue' do |url| @queue << url end option :visited, value: { - type: String, + type: String, usage: 'URL' }, desc: 'Marks the URL as previously visited' do |url| @history << url end @@ -241,11 +240,11 @@ option :strip_fragments, desc: 'Enables/disables stripping the fragment component of every URL' option :strip_query, desc: 'Enables/disables stripping the query component of every URL' option :visit_host, value: { - type: String, + type: String, usage: 'HOST' }, desc: 'Visit URLs with the matching host name' do |host| @visit_hosts << host end @@ -265,19 +264,19 @@ desc: 'Ignore the host name' do |host| @ignore_hosts << host end option :ignore_hosts_like, value: { - type: Regexp, - usage: '/REGEX/' - }, - desc: 'Ignore the host names matching the REGEX' do |regex| - @ignore_hosts << regex - end + type: Regexp, + usage: '/REGEX/' + }, + desc: 'Ignore the host names matching the REGEX' do |regex| + @ignore_hosts << regex + end option :visit_port, value: { - type: Integer, + type: Integer, usage: 'PORT' }, desc: 'Visit URLs with the matching port number' do |port| @visit_ports << port end @@ -297,19 +296,19 @@ desc: 'Ignore the port number' do |port| @ignore_ports << port end option :ignore_ports_like, value: { - type: Regexp, - usage: '/REGEX/' - }, - desc: 'Ignore the port numbers matching the REGEXP' do |regex| - @ignore_ports << regex - end + type: Regexp, + usage: '/REGEX/' + }, + desc: 'Ignore the port numbers matching the REGEXP' do |regex| + @ignore_ports << regex + end option :visit_link, value: { - type: String, + type: String, usage: 'URL' }, desc: 'Visit the URL' do |link| @visit_links << link end @@ -329,40 +328,40 @@ desc: 'Ignore the URL' do |link| @ignore_links << link end option :ignore_links_like, value: { - type: Regexp, - usage: '/REGEX/' - }, - desc: 'Ignore URLs matching the REGEX' do |regex| - @ignore_links << regex - end + type: Regexp, + usage: '/REGEX/' + }, + desc: 'Ignore URLs matching the REGEX' do |regex| + @ignore_links << regex + end option :visit_ext, value: { - type: String, - usage: 'FILE_EXT' - }, - desc: 'Visit URLs with the matching file ext' do |ext| - @visit_exts << ext - end + type: String, + usage: 'FILE_EXT' + }, + desc: 'Visit URLs with the matching file ext' do |ext| + @visit_exts << ext + end option :visit_exts_like, value: { - type: Regexp, - usage: '/REGEX/' - }, - desc: 'Visit URLs with file exts that match the REGEX' do |regex| - @visit_exts << regex - end + type: Regexp, + usage: '/REGEX/' + }, + desc: 'Visit URLs with file exts that match the REGEX' do |regex| + @visit_exts << regex + end option :ignore_ext, value: { - type: String, - usage: 'FILE_EXT' - }, - desc: 'Ignore the URLs with the file ext' do |ext| - @ignore_exts << ext - end + type: String, + usage: 'FILE_EXT' + }, + desc: 'Ignore the URLs with the file ext' do |ext| + @ignore_exts << ext + end option :ignore_exts_like, value: { type: Regexp, usage: '/REGEX/' }, @@ -372,51 +371,51 @@ option :robots, short: '-r', desc: 'Specifies whether to honor robots.txt' option :host, value: { - type: String, + type: String, usage: 'HOST' }, desc: 'Spiders the specific HOST' option :domain, value: { - type: String, - usage: 'DOMAIN', + type: String, + usage: 'DOMAIN' }, desc: 'Spiders the whole domain' option :site, value: { - type: String, + type: String, usage: 'URL' }, desc: 'Spiders the website, starting at the URL' option :print_stauts, desc: 'Print the status codes for each URL' option :print_headers, desc: 'Print response headers for each URL' option :print_header, value: { - type: String, + type: String, usage: 'NAME' }, desc: 'Prints a specific header' option :history, value: { - type: String, + type: String, usage: 'FILE' }, desc: 'The history file' option :archive, value: { - type: String, + type: String, usage: 'DIR' }, desc: 'Archive every visited page to the DIR' option :git_archive, value: { - type: String, + type: String, usage: 'DIR' }, desc: 'Archive every visited page to the git repository' option :xpath, short: '-X', @@ -586,10 +585,12 @@ archive.write(page.url,page.body) end end end + # post-spidering tasks + if options[:git_archive] archive.commit "Updated #{Time.now}" end if options[:print_hosts] @@ -697,9 +698,10 @@ # @return [Hash{Symbol => Object}] # The keyword arguments for `Ronin::Web::Spider::Agent#initialize`. # def agent_kwargs kwargs = {} + kwargs[:proxy] = options[:proxy] if options[:proxy] unless @default_headers.empty? kwargs[:default_headers] = @default_headers end