Sha256: 0b9557abe51992fec07ceba4b398d5cbe87ce054758cf6fe6758c47d98871143

Contents?: true

Size: 1.3 KB

Versions: 1

Compression:

Stored size: 1.3 KB

Contents

#!/usr/bin/env ruby
# encoding:UTF-8

require 'open-uri'
require 'configliere'

NOAA_URL = 'http://www1.ncdc.noaa.gov/pub/data/noaa/'
Settings.use :commandline

Settings({
  years: [1901],
  verbose: false,
  out_dir: /data/rawd/noaa/isd/,
  un_gzip: false,
})

Settings.define :years, flag 'y', description: "Years to download"
Settings.define :verbose, flag 'v', description: "Get chatty", type: :boolean
Settings.define :un_gzip, flag 'g', description: "Unzip the files as they are uploaded", type: :boolean
Settings.define :out_dir, flag 'o', description: "The directory in the hdfs to put the files"

Settings.resolve!

def get_files_for_year(year)
  year_page = open("#{NOAA_URL}/#{year}")
  years = []
  year_page.each_line do |line|
    next unless line =~ /<a href="[^.]*\.gz">/
    match = /<a href="([^.]*\.gz)">/.match(line)
    years << match[1] if not match.nil?
  end
  return years
end

years.each do |year|
  puts "Uploading files for year #{year}..." if Settings[:verbose]
  get_files_for_year(year).each do |file|
    puts "  Uploading #{file}..." if Settings[:verbose]
    path = "#{NOAA_URL}/#{year}/#{file}"
    if Settings[:un_gzip]
      `curl '#{path}' | zcat | hdp-put #{Settings[:out_dir]}/#{year}/#{file}`
    else
      `curl #{file} | hdp-put #{Settings[:out_dir]}/#{year}/#{file}`
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
wukong-3.0.0.pre2 examples/munging/weather/utils/noaa_downloader.rb