Sha256: 2b3567779f74c3af2f6d4f8cc472953ea0039d6f0497f8542c2950c18f3f0f9a

Contents?: true

Size: 1.54 KB

Versions: 16

Compression:

Stored size: 1.54 KB

Contents

# frozen_string_literal: true

module CMSScanner
  # Scope system logic
  class Target < WebSite
    # @note Comments are deleted to avoid cache generation details
    #
    # @param [ Typhoeus::Response, String ] page
    #
    # @return [ String ] The md5sum of the page
    def self.page_hash(page)
      page = NS::Browser.get(page, followlocation: true) unless page.is_a?(Typhoeus::Response)

      # Removes comments and script tags before computing the hash
      # to remove any potential cached stuff
      html = Nokogiri::HTML(page.body)
      html.xpath('//script|//comment()').each(&:remove)

      Digest::MD5.hexdigest(html)
    end

    # @return [ String ] The hash of the homepage
    def homepage_hash
      @homepage_hash ||= self.class.page_hash(url)
    end

    # @note This is used to detect potential custom 404 responding with a 200
    # @return [ String ] The hash of a 404
    def error_404_hash
      @error_404_hash ||= self.class.page_hash(non_existant_page_url)
    end

    # @return [ String ] The URL of an unlikely existant page
    def non_existant_page_url
      uri.join(Digest::MD5.hexdigest(rand(999_999_999).to_s) + '.html').to_s
    end

    # @param [ Typhoeus::Response, String ] page
    # @return [ Boolean ] Wether or not the page is a the homepage or a 404 based on its md5sum
    def homepage_or_404?(page)
      homepage_and_404_hashes.include?(self.class.page_hash(page))
    end

    protected

    def homepage_and_404_hashes
      @homepage_and_404_hashes ||= [homepage_hash, error_404_hash].freeze
    end
  end
end

Version data entries

16 entries across 16 versions & 1 rubygems

Version Path
cms_scanner-0.6.1 lib/cms_scanner/target/hashes.rb
cms_scanner-0.6.0 lib/cms_scanner/target/hashes.rb
cms_scanner-0.5.8 lib/cms_scanner/target/hashes.rb
cms_scanner-0.5.7 lib/cms_scanner/target/hashes.rb
cms_scanner-0.5.6 lib/cms_scanner/target/hashes.rb
cms_scanner-0.5.5 lib/cms_scanner/target/hashes.rb
cms_scanner-0.5.4 lib/cms_scanner/target/hashes.rb
cms_scanner-0.5.3 lib/cms_scanner/target/hashes.rb
cms_scanner-0.5.2 lib/cms_scanner/target/hashes.rb
cms_scanner-0.5.1 lib/cms_scanner/target/hashes.rb
cms_scanner-0.5.0 lib/cms_scanner/target/hashes.rb
cms_scanner-0.0.44.3 lib/cms_scanner/target/hashes.rb
cms_scanner-0.0.44.2 lib/cms_scanner/target/hashes.rb
cms_scanner-0.0.44.1 lib/cms_scanner/target/hashes.rb
cms_scanner-0.0.44.0 lib/cms_scanner/target/hashes.rb
cms_scanner-0.0.43.2 lib/cms_scanner/target/hashes.rb