Sha256: 3d2b814e5bf58ac44bb3f9508a8e486cbbc94bd54d12bf50959882765569052a
Contents?: true
Size: 1.84 KB
Versions: 5
Compression:
Stored size: 1.84 KB
Contents
#!/usr/bin/env ruby # # Reconciles the USA.gov-maintained list of US domains with domains.txt # to show domains listed in the USA.gov-maintained list that we reject and why # # Usage: script/reconcile-us require './lib/gman/importer' require 'yaml' ENV['RECONCILING'] = 'true' blacklist = ['usagovQUASI'] source = 'https://raw.githubusercontent.com/GSA/govt-urls/master/government-urls-hierarchical-list.txt' data = open(source).read data = data.split('_' * 74) data = data.last.strip data = data.split(/\r?\n/).reject(&:empty?) domains = {} group = '' data.each do |row| if row =~ /^\w/ group = row domains[group] = [] else domains[group].push row.sub("\.\t", '').strip end end domains.reject! { |g, _domain| blacklist.include?(g) } importer = Gman::Importer.new(domains) importer.logger.info "Starting with #{importer.domains.count} domains" importer.domains.list.each do |_group, d| d.map! { |domain| Gman.new(domain).to_s } d.map! { |domain| importer.normalize_domain(domain) } end count = importer.domains.domains.count importer.logger.info "Filtered down to #{count} normalized domains" missing = {} importer.domains.list.each do |g, usagovdomains| next unless importer.current.list[g] missing[g] = importer.current.list[g] - usagovdomains end missing.reject! { |_key, value| value.empty? } count = missing.values.count importer.logger.info "Found #{count} domains not on the USA.gov list" puts "Here's the list of missing domains:" puts YAML.dump(missing) domains = importer.domains.domains domains = domains.group_by do |domain| importer.valid_domain?(domain, skip_dupe: true) end domains.delete(true) domains.delete(false) domains.delete('locality') count = domains.values.flatten.count importer.logger.info "Calling out #{count} rejected domains" puts 'Here are the rejected domains and why they were rejected:' puts YAML.dump(domains)
Version data entries
5 entries across 5 versions & 1 rubygems
Version | Path |
---|---|
gman-7.0.2 | script/reconcile-us |
gman-7.0.1 | script/reconcile-us |
gman-7.0.0 | script/reconcile-us |
gman-6.0.1 | script/reconcile-us |
gman-6.0.0 | script/reconcile-us |