Sha256: 0b9557abe51992fec07ceba4b398d5cbe87ce054758cf6fe6758c47d98871143
Contents?: true
Size: 1.3 KB
Versions: 1
Compression:
Stored size: 1.3 KB
Contents
#!/usr/bin/env ruby # encoding:UTF-8 require 'open-uri' require 'configliere' NOAA_URL = 'http://www1.ncdc.noaa.gov/pub/data/noaa/' Settings.use :commandline Settings({ years: [1901], verbose: false, out_dir: /data/rawd/noaa/isd/, un_gzip: false, }) Settings.define :years, flag 'y', description: "Years to download" Settings.define :verbose, flag 'v', description: "Get chatty", type: :boolean Settings.define :un_gzip, flag 'g', description: "Unzip the files as they are uploaded", type: :boolean Settings.define :out_dir, flag 'o', description: "The directory in the hdfs to put the files" Settings.resolve! def get_files_for_year(year) year_page = open("#{NOAA_URL}/#{year}") years = [] year_page.each_line do |line| next unless line =~ /<a href="[^.]*\.gz">/ match = /<a href="([^.]*\.gz)">/.match(line) years << match[1] if not match.nil? end return years end years.each do |year| puts "Uploading files for year #{year}..." if Settings[:verbose] get_files_for_year(year).each do |file| puts " Uploading #{file}..." if Settings[:verbose] path = "#{NOAA_URL}/#{year}/#{file}" if Settings[:un_gzip] `curl '#{path}' | zcat | hdp-put #{Settings[:out_dir]}/#{year}/#{file}` else `curl #{file} | hdp-put #{Settings[:out_dir]}/#{year}/#{file}` end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
wukong-3.0.0.pre2 | examples/munging/weather/utils/noaa_downloader.rb |