lib/cms_scanner/finders/finder/enumerator.rb in cms_scanner-0.0.41.10 vs lib/cms_scanner/finders/finder/enumerator.rb in cms_scanner-0.0.42.0

- old
+ new

@@ -1,45 +1,74 @@ +# frozen_string_literal: true + module CMSScanner module Finders class Finder # Module to provide an easy way to enumerate items such as plugins, themes etc module Enumerator + # @return [ Hash ] + def head_or_get_request_params + # Disabling the cache, as it causes a 'stack level too deep' exception + # with a large number of requests. + # See https://github.com/typhoeus/typhoeus/issues/408 + @head_or_get_request_params ||= target.head_or_get_params.merge(cache_ttl: 0) + end + + # @return [ Array<Integer> ] + def valid_response_codes + @valid_response_codes ||= [200] + end + # @param [ Hash ] The target urls # @param [ Hash ] opts # @option opts [ Boolean ] :show_progression Wether or not to display the progress bar # @option opts [ Regexp ] :exclude_content + # @option opts [ Boolean, Array, String ] :check_full_response # # @yield [ Typhoeus::Response, String ] - def enumerate(target_urls, opts = {}) - create_progress_bar(opts.merge(total: target_urls.size)) + def enumerate(urls, opts = {}) + create_progress_bar(opts.merge(total: urls.size)) - target_urls.each do |url, id| - request = browser.forge_request(url, request_params) + urls.each do |url, id| + request = browser.forge_request(url, head_or_get_request_params) - request.on_complete do |res| + request.on_complete do |head_res| progress_bar.increment - next if target.homepage_or_404?(res) + next unless valid_response_codes.include?(head_res.code) - if opts[:exclude_content] - next if res.response_headers&.match(opts[:exclude_content]) || res.body.match(opts[:exclude_content]) - end + next if opts[:exclude_content] && head_res.response_headers&.match(opts[:exclude_content]) - yield res, id + head_or_full_res = maybe_get_full_response(head_res, opts) + + yield head_or_full_res, id if head_or_full_res end hydra.queue(request) end hydra.run end + # @param [ Typhoeus::Response ] head_res + # @param [ Hash ] opts + # + # @return [ Typhoeus::Response, nil ] + def maybe_get_full_response(head_res, opts) + return head_res unless opts[:check_full_response] == true || + [*opts[:check_full_response]].include?(head_res.code) + + full_res = NS::Browser.get(head_res.effective_url, full_request_params) + + return if target.homepage_or_404?(full_res) || + opts[:exclude_content] && full_res.body&.match(opts[:exclude_content]) + + full_res + end + # @return [ Hash ] - def request_params - # disabling the cache, as it causes a 'stack level too deep' exception - # with a large number of requests :/ - # See https://github.com/typhoeus/typhoeus/issues/408 - { cache_ttl: 0 } + def full_request_params + @full_request_params ||= {} end end end end end