Sha256: 13d8cd9c9938de4c18191cb42d8b64881b5e5a633dd3709a77c51b50fabdb8d7

Contents?: true

Size: 1.98 KB

Versions: 1

Compression:

Stored size: 1.98 KB

Contents

# frozen_string_literal: true

require "http"
require "json"
require "thread/pool"
require "uri"

module Miteru
  class Crawler
    attr_reader :threads
    attr_reader :size
    attr_reader :verbose

    URLSCAN_ENDPOINT = "https://urlscan.io/api/v1"
    OPENPHISH_ENDPOINT = "https://openphish.com"

    def initialize(size: 100, verbose: false)
      @threads = 10
      @size = size
      @verbose = verbose
      raise ArgumentError, "size must be less than 100,000" if size > 100_000
    end

    def urlscan_feed
      url = "#{URLSCAN_ENDPOINT}/search/?q=certstream-suspicious&size=#{size}"
      res = JSON.parse(get(url))
      res["results"].map { |result| result.dig("task", "url") }
    end

    def openphish_feed
      res = get("#{OPENPHISH_ENDPOINT}/feed.txt")
      res.lines.map(&:chomp)
    end

    def breakdown(url)
      begin
        uri = URI.parse(url)
      rescue URI::InvalidURIError => _
        return []
      end
      base = "#{uri.scheme}://#{uri.hostname}"
      [base]
      # TODO: Should add a option for burute force directory
      # segments = uri.path.split("/")
      # if segments.length.zero?
      #   [base]
      # else
      #   (0...segments.length).map { |idx| "#{base}#{segments[0..idx].join('/')}" }
      # end
    end

    def suspicious_urls
      urls = urlscan_feed + openphish_feed
      urls.map { |url| breakdown(url) }.flatten.uniq.sort
    end

    def execute
      pool = Thread.pool(threads)
      websites = []

      suspicious_urls.each do |url|
        pool.process do
          website = Website.new(url)
          puts "#{website.url}: it doesn't contain a phishing kit." if verbose && !website.has_kit?
          websites << website
        end
      end
      pool.shutdown

      websites
    end

    def self.execute(size: 100, verbose: false)
      new(size: size, verbose: verbose).execute
    end

    private

    def get(url)
      res = HTTP.get(url)
      raise HTTPResponseError if res.code != 200

      res.body.to_s
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
miteru-0.5.0 lib/miteru/crawler.rb