Sha256: 6db657a308f8364ed9c3d5c0b59fa2270c750b333a57986bb65e23ced643748f

Contents?: true

Size: 1.53 KB

Versions: 4

Compression:

Stored size: 1.53 KB

Contents

require 'public_suffix'
require 'yaml'

module Gman

  VERSION='0.0.3'

  class << self

    # Normalizes and checks if a given string represents a governemnt domain
    # Possible strings to test:
    #   ".gov"
    #   "foo.gov"
    #   "foo@bar.gov"
    #   "foo.gov.uk"
    #   "http://foo.bar.gov"
    #
    # Returns boolean true if a government domain
    def valid?(text)
      return false if text.nil?
      domain = get_domain text

      # check using public suffix's standard logic
      rule = list.find domain
      return true if !rule.nil? && rule.allow?(domain)

      # also allow for explicit matches to domain list
      # but still make sure it's at least a valid domain
      return false unless PublicSuffix.valid? domain
      list.rules.any? { |rule| rule.value == domain }
    end

    # returns an instance of our custom public suffix list
    # list behaves like PublicSuffix::List but is limited to our whitelisted domains
    def list
      @list || PublicSuffix::List::parse( File.new(File.join(File.dirname(__FILE__), "domains.txt"), "r:utf-8"))
    end

    # Get the FQDN name from a URL or email address.
    #
    # Returns a string with the FQDN; nil if there's an error.
    # Source: https://github.com/leereilly/swot/blob/master/lib/swot.rb#L190
    def get_domain(text)
      text.strip.downcase.match(domain_regex).captures.first
    rescue
      return nil
    end

    private

    # Source: https://github.com/leereilly/swot/blob/master/lib/swot.rb#L202
    def domain_regex
      /([^@\/:]+)[:\d]*$/
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
gman-0.0.6 lib/gman.rb
gman-0.0.5 lib/gman.rb
gman-0.0.4 lib/gman.rb
gman-0.0.3 lib/gman.rb