Sha256: a2068d4217dba2cf81f35cd436c189a60844afa0c60818e2148cf0111b2498fa

Contents?: true

Size: 970 Bytes

Versions: 2

Compression:

Stored size: 970 Bytes

Contents

#! /usr/bin/env ruby
# frozen_string_literal: true

#
# Vendors the USA.gov-maintained list of US domains into domains.txt
# Source: https://github.com/GSA-OCSIT/govt-urls
#
# Usage: script/vendor-us
#
# Will automatically fetch latest version of the list and merge
# You can check for changes and commit via `git status`
#
# It's also probably a good idea to run `script/ci-build` for good measure

require './lib/gman'
require 'open-uri'

blacklist = %w[usagovQUASI usagovFEDgov]
source = 'https://raw.githubusercontent.com/GSA/govt-urls/master/government-urls-hierarchical-list.txt'

data = URI.open(source).read
data = data.split('_' * 74)
data = data.last.strip
data = data.split(/\r?\n/).reject(&:empty?)

domains = {}
group = ''
data.each do |row|
  if /^\w/.match?(row)
    group = row
    domains[group] = []
  else
    domains[group].push row.sub("\.\t", '').strip
  end
end

domains.reject! { |g, _| blacklist.include?(g) }
Gman::Importer.new(domains).import

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
gman-7.0.5 script/vendor-us
gman-7.0.4 script/vendor-us