Sha256: 50bd3a567976796cc67ac4826e146383ba069e6aa1f6c857cd799529fda46898
Contents?: true
Size: 980 Bytes
Versions: 8
Compression:
Stored size: 980 Bytes
Contents
#! /usr/bin/env ruby # # Vendors the USA.gov-maintained list of US domains into domains.txt # Source: https://github.com/GSA-OCSIT/govt-urls # # Usage: script/vendor-us # # Will automatically fetch latest version of the list and merge # You can check for changes and commit via `git status` # # It's also probably a good idea to run `script/ci-build` for good measure require './lib/gman/importer' blacklist = ["usagovQUASI"] source = "https://raw.githubusercontent.com/GSA/govt-urls/master/government-urls-hierarchical-list.txt" data = open(source).read data = data.split("__________________________________________________________________________") data = data.last.strip data = data.split(/\r?\n/).reject { |r| r.empty? } domains = {} group = "" data.each do |row| if row =~ /^\w/ group = row domains[group] = [] else domains[group].push row.sub("\.\t", "").strip end end domains.reject! { |group,domain| blacklist.include?(group) } Gman.import(domains)
Version data entries
8 entries across 8 versions & 1 rubygems