#!/usr/bin/env ruby # frozen_string_literal: true require_relative "helpers" def ten_minute_mail path = "disposable/10minutemail.txt" url = "https://10minutemail.com/session/address" 20.times do refresh_list(url: url, path: path) do |response| _account, host = response.data.fetch("address").split("@") [host] end sleep random_timeout end end def temp_mail path = "disposable/tempmail.txt" url = "https://api4.temp-mail.org/request/domains/format/json" refresh_list(url: url, path: path) do |response| response.data.map {|domain| domain.tr("@", "") } end end def temp_mail_address path = "disposable/tempmailaddress.txt" url = "https://www.tempmailaddress.com/index/index" refresh_list(url: url, path: path) do |response| data = JSON.parse( response.body.gsub(/[^-,:\w@.{}"]/, ""), symbolize_names: true ) [data[:email].split("@").last] end end def tempmail_io path = "disposable/tempmail_io.txt" url = "https://api.internal.temp-mail.io/api/v2/domains" refresh_list(url: url, path: path) do |response| response.data["domains"] end end def gmailnator emails = [] 5.times do url = "https://gmailnator.com/bulk-emails" default_headers = {"user-agent" => USER_AGENT.sample} response = Aitch.get(url: url, headers: default_headers) unless response.ok? throw "Received #{response.status} when getting CSRF token" end cookie_header = response.headers["set-cookie"] attr = response.data.css("#csrf_token").first csrf_token = attr[:value] csrf_field = attr[:name] response = Aitch.post( url: url, params: {email_list: "1000", email: [3], csrf_field => csrf_token}, headers: default_headers.merge({"cookie" => cookie_header}) ) throw "Received #{response.status} when fetching list" unless response.ok? emails += response.data.css("#email-list-message a").map do |node| mailbox, domain = node.text.gsub(/\+[^@]+/, "").split("@") mailbox = mailbox.gsub(/\./m, "") "#{mailbox}@#{domain}" end sleep random_timeout end append_to_file("disposable/gmailnator.txt", emails) end def domain_scraping(name, url, selector) Timeout.timeout(10) do puts "=> Scraping #{url}" selector, value_selector = selector.split("::") path = "disposable/#{name}.txt" host_regex = /@?(.*?(\.[^.]+)+)/ refresh_list(url: url, path: path) do |response| new_domains = response .data .css(selector) .map {|element| process_scraping(element, value_selector) } new_domains = new_domains .map(&:squish) .reject(&:empty?) .map {|domain| domain[host_regex, 1]&.squish&.tr("@", "") } .reject(&:nil?) .reject(&:empty?) .map {|domain| domain.gsub(/\s*\((.*?)\)/, "") } raise "No #{name} hosts found" if new_domains.empty? new_domains end end rescue StandardError => error puts "=> [ERROR] Unable to scrape #{url}; #{error.class}: #{error.message}" [] end def process_scraping(element, value_selector) value = nil case value_selector when "text()" value = element.text when /^attr\((.*?)\)/ value = element[Regexp.last_match(1)] else element.attributes.each do |_name, attr| attr = attr.value.to_s value = attr if attr =~ host_regex end end unless value raise "no value found: #{element} (value_selector: #{value_selector})" end value end threads = [] threads << thread { load_github_url("https://raw.githubusercontent.com/ivolo/disposable-email-domains/master/index.json") } threads << thread { load_github_url("https://raw.githubusercontent.com/andreis/disposable-email-domains/master/domains.json") } threads << thread { load_github_url("https://raw.githubusercontent.com/FGRibreau/mailchecker/master/list.txt") } threads << thread { load_github_url("https://raw.githubusercontent.com/willwhite/freemail/master/data/disposable.txt") } threads << thread { load_github_url("https://raw.githubusercontent.com/maxmalysh/disposable-emails/master/disposable_emails/data/domains.txt") } threads << thread { load_github_url("https://raw.githubusercontent.com/jespernissen/disposable-maildomain-list/master/disposable-maildomain-list.txt") } threads << thread { load_github_url("https://raw.githubusercontent.com/wesbos/burner-email-providers/master/emails.txt") } threads << thread { load_github_url("https://gist.github.com/fnando/dafe542cac13f831bbf5521a55248116/raw/disposable.txt") } threads << thread { ten_minute_mail } threads << thread { temp_mail } threads << thread { temp_mail_address } threads << thread { tempmail_io } threads << thread { load_file("disposable/disposable_manually_added.txt") } threads << thread { domain_scraping("guerrillamail", "https://www.guerrillamail.com/", "select option::attr(value)") } threads << thread { domain_scraping("moakt", "https://www.moakt.com", "select option::attr(value)") } threads << thread { domain_scraping("tempr", "https://tempr.email/", "select[name=DomainId] option::text()") } threads << thread { domain_scraping("yepmail", "https://yepmail.co/", "select[name=domain] option::text()") } threads << thread { domain_scraping("fake_email_generator", "https://fakemailgenerator.net", "[data-mailhost]::attr(data-mailhost)") } threads << thread { domain_scraping("tempemails", "https://www.tempemails.net/", "select[name=domain] option::attr(value)") } threads << thread { domain_scraping("clipmails", "https://clipmails.com/", ".domain-selector::text()") } threads << thread { domain_scraping("1secmail", "https://www.1secmail.com/", "select[id=domain] option::attr(value)") } threads << thread { domain_scraping("emailfake", "https://generator.email", ".tt-suggestion p::text()") } threads << thread { domain_scraping("emailfake", "https://emailfake.com/", ".tt-suggestion p::text()") } threads << thread { domain_scraping("emailfake", "https://email-fake.com/", ".tt-suggestion p::text()") } threads << thread { domain_scraping("receivemail", "https://www.receivemail.org/", "select[name=domain] option::text()") } threads << thread { domain_scraping("itemp", "https://itemp.email", "select[name=domain] option::text()") } threads << thread { domain_scraping("cs", "https://www.cs.email", "select[id=gm-host-select] option::text()") } threads << thread { domain_scraping("tempmail", "https://tempmail.io/settings/", "select[id=domain] option::text()") } threads << thread { domain_scraping("tempemail", "https://tempemail.co", "select[name=email_domain] option::text()") } threads << thread { domain_scraping("tmail", "https://mytemp-email.com/", "a.domain-selector::text()") } threads.each_slice(5) do |slice| slice.each(&:join) end threads.clear domains = [] puts "=> Loading disposable_domains.txt" domains += normalize_list(File.read("#{__dir__}/../data/disposable_domains.txt").lines) puts "=> Loading manual/disposable_domains.txt" domains += normalize_list(File.read("#{__dir__}/../data/manual/disposable_domains.txt").lines) puts "=> Loading disposable/*.txt" Dir["./data/disposable/**/*.txt"].map do |file| file = File.expand_path(file) domains += normalize_list(File.read(file).lines) end ignore_domains = normalize_list(File.read("#{__dir__}/../data/free_email_domains.txt").lines) .map {|domain| RootDomain.call(domain) } ignore_domains += %w[ appleid.com ] puts "=> Normalize domains (count: #{domains.size})" domains = domains .map {|domain| RootDomain.call(domain.split("@").last.downcase) } .compact .uniq .select {|domain| EmailData.tlds.include?(domain.split(".").last) } puts "=> Saving domains (count: #{domains.size})" save_file("disposable_domains.txt", domains - ignore_domains) emails = gmailnator emails += normalize_list(File.read("#{__dir__}/../data/manual/disposable_emails.txt").lines) puts "=> Saving email proxies (count: #{emails.size})" save_file("disposable_emails.txt", emails)