# Utility functions for parsing and manipulating public-suffix domain lists
# Only used in development and not loaded by default
require 'yaml'
require 'open-uri'
require 'resolv'
require 'logger'
require_relative '../gman'
require_relative './domain_list'

class Gman
  class Importer
    attr_accessor :domains

    # Known false positives from vendored lists
    BLACKLIST = %w(
      business.centurytel.net
      chesnee.net
      citlink.net
      egovlink.com
      emainehosting.com
      fantasyspringsresort.com
      frontiernet.net
      hartford-hwp.com
      homepages.sover.net
      htc.net
      koasekabenaki.org
      kstrom.net
      laworkforce.net
      mississippistateparks.reserveamerica.com
      mylocalgov.com
      myweb.cebridge.net
      ncstars.org
      neagrelations.org
      qis.net
      rootsweb.com
      showcase.netins.net
      valuworld.com
      wctc.net
      webconnections.net
      webpages.charter.net
    ).freeze

    REGEX_CHECKS = {
      'home. regex'     => /^home\./,
      'user. regex'     => /^users?\./,
      'sites. regex'    => /^sites?\./,
      'weebly'          => /weebly\.com$/,
      'wordpress'       => /wordpress\.com$/,
      'govoffice'       => /govoffice\d?\.com$/,
      'homestead'       => /homestead\.com$/,
      'wix.com'         => /wix\.com$/,
      'blogspot.com'    => /blogspot\.com$/,
      'tripod.com'      => /tripod\.com$/,
      'squarespace.com' => /squarespace\.com$/,
      'github.io'       => /github\.io$/,
      'tumblr'          => /tumblr\.com$/,
      'locality'        => Gman::Locality::REGEX
    }.freeze

    def initialize(domains)
      @domains = DomainList.new(domains)
    end

    def logger
      @logger ||= Logger.new(STDOUT)
    end

    def normalize_domain(domain)
      domain = Gman.new(domain).to_s
      domain.to_s.downcase.strip.gsub(/^www./, '').gsub(%r{/$}, '')
    end

    def valid_domain?(domain, options = {})
      return false unless ensure_valid(domain)
      return false if !options[:skip_dupe] && !ensure_not_dupe(domain)
      return false if !options[:skip_resolve] && !ensure_resolves(domain)
      true
    end

    # if RECONCILING=true, return the reason,
    # rather than a bool and silence log output
    def reject(domain, reason)
      return reason if ENV['RECONCILING']
      logger.info "👎 `#{domain}`: #{reason}"
      false
    end

    def current
      @current ||= DomainList.current
    end

    def import(options)
      logger.info "Current: #{Gman::DomainList.current.count} domains"
      logger.info "Adding: #{domains.count} domains"

      normalize_domains!
      ensure_validity!(options)

      if domains.count == 0
        logger.info 'Nothing to add. Aborting'
        exit 0
      end

      add_to_current
      logger.info "New: #{current.count} domains"
    end

    def resolver
      @resolver ||= Resolv::DNS.new(nameserver: ['8.8.8.8', '8.8.4.4'])
    end

    # Verifies that the given domain has an MX record, and thus is valid
    def domain_resolves?(domain)
      domain = Addressable::URI.new(host: domain).normalize.host
      return true if ip?(domain)
      returns_record?(domain, 'NS') || returns_record?(domain, 'MX')
    end

    private

    def ensure_regex(domain)
      REGEX_CHECKS.each do |msg, regex|
        return reject(domain, msg) if domain =~ regex
      end
      true
    end

    def ensure_valid(domain)
      return false if domain.empty?
      if BLACKLIST.include?(domain)
        reject(domain, 'blacklist')
      elsif !PublicSuffix.valid?(".#{domain}")
        reject(domain, 'invalid')
      elsif Swot.is_academic?(domain)
        reject(domain, 'academic')
      else
        ensure_regex(domain)
      end
    end

    def ensure_resolves(domain)
      return reject(domain, 'unresolvable') unless domain_resolves?(domain)
      true
    end

    def ensure_not_dupe(domain)
      return true unless dupe?(domain)
      if current.domains.include?(domain)
        reject(domain, 'duplicate')
      else
        parent = current.parent_domain(domain)
        reject(domain, "subdomain of #{parent}")
      end
    end

    def dupe?(domain)
      current.domains.include?(domain) || current.parent_domain(domain)
    end

    def normalize_domains!
      domains.list.each do |_group, domains|
        domains.map! { |domain| normalize_domain(domain) }
        domains.uniq!
      end
    end

    def ensure_validity!(options = {})
      domains.list.each do |_group, domains|
        domains.select! { |domain| valid_domain?(domain, options) }
      end
    end

    def add_to_current
      domains.list.each do |group, domains|
        current.list[group] ||= []
        current.list[group].concat domains
      end
      current.write
    end

    def ip?(domain)
      resolver.getaddress(domain)
    rescue Resolv::ResolvError
      false
    end

    def returns_record?(domain, type)
      type = Object.const_get "Resolv::DNS::Resource::IN::#{type}"
      resolver.getresource(domain, type)
    rescue Resolv::ResolvError
      false
    end
  end
end

class Gman
  def self.import(hash, options = {})
    Gman::Importer.new(hash).import(options)
  end
end