Sha256: 0c383565cfadddb87010fb068ca6d553692f03e470d812882e74ab28ec0ff455

Contents?: true

Size: 1.75 KB

Versions: 1

Compression:

Stored size: 1.75 KB

Contents

require 'net/ftp'

# Downloads tarball at the given URL if a local copy does not exist, or if the
# local copy is older than at the given URL, or if the local copy is corrupt.
def download(url)
  file = File.basename(url)
  # Resume an interrupted download or fetch the file for the first time. If
  # the file on the server is newer, then it is downloaded from start.
  sh "wget -Nc #{url}"
  # If the local copy is already fully retrieved, then the previous command
  # ignores the timestamp. So we check with the server again if the file on
  # the server is newer and if so download the new copy.
  sh "wget -N #{url}"

  # Immediately download md5 and verify the tarball. Re-download tarball if
  # corrupt; extract otherwise.
  sh "wget #{url}.md5 && md5sum -c #{file}.md5" do |matched, _|
    if !matched
      sh "rm #{file} #{file}.md5"; download(url)
    else
      sh "tar xvf #{file}"
    end
  end
end

# Connects to NCBI's FTP server, gets the URL of all database volumes and
# returns them grouped by database name:
#
#     {'nr' => ['ftp://...', ...], 'nt' => [...], ...}
#
def databases
  host, dir = 'ftp.ncbi.nlm.nih.gov', 'blast/db'
  usr, pswd = 'anonymous', ENV['email']

  Net::FTP.open(host, usr, pswd) do |con|
    con.passive = true
    con.nlst(dir).
      map { |file| File.join(host, file) }.
      select { |file| file.match(/\.tar\.gz$/) }.
      group_by { |file| File.basename(file).split('.')[0] }
  end
end

# Create user-facing task for each database to drive the download of its
# volumes in parallel.
databases.each do |name, files|
  multitask(name => files.map { |file| task(file) { download(file) } })
end

# List name of all databases that can be downloaded if executed without
# any arguments.
task :default do
  puts databases.keys.join(', ')
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
ncbi-blast-dbs-0.0.5 lib/ncbi-blast-dbs.rake