Sha256: 50bd3a567976796cc67ac4826e146383ba069e6aa1f6c857cd799529fda46898

Contents?: true

Size: 980 Bytes

Versions: 8

Compression:

Stored size: 980 Bytes

Contents

#! /usr/bin/env ruby
#
# Vendors the USA.gov-maintained list of US domains into domains.txt
# Source: https://github.com/GSA-OCSIT/govt-urls
#
# Usage: script/vendor-us
#
# Will automatically fetch latest version of the list and merge
# You can check for changes and commit via `git status`
#
# It's also probably a good idea to run `script/ci-build` for good measure

require './lib/gman/importer'

blacklist = ["usagovQUASI"]
source = "https://raw.githubusercontent.com/GSA/govt-urls/master/government-urls-hierarchical-list.txt"

data = open(source).read
data = data.split("__________________________________________________________________________")
data = data.last.strip
data = data.split(/\r?\n/).reject { |r| r.empty? }

domains = {}
group = ""
data.each do |row|
  if row =~ /^\w/
    group = row
    domains[group] = []
  else
    domains[group].push row.sub("\.\t", "").strip
  end
end

domains.reject! { |group,domain| blacklist.include?(group) }
Gman.import(domains)

Version data entries

8 entries across 8 versions & 1 rubygems

Version Path
gman-5.0.9 script/vendor-us
gman-5.0.8 script/vendor-us
gman-5.0.7 script/vendor-us
gman-5.0.6 script/vendor-us
gman-5.0.5 script/vendor-us
gman-5.0.4 script/vendor-us
gman-5.0.3 script/vendor-us
gman-5.0.2 script/vendor-us