lib/ncbi-blast-dbs.rake in ncbi-blast-dbs-0.0.4 vs lib/ncbi-blast-dbs.rake in ncbi-blast-dbs-0.0.5
- old
+ new
@@ -2,28 +2,34 @@
# Downloads tarball at the given URL if a local copy does not exist, or if the
# local copy is older than at the given URL, or if the local copy is corrupt.
def download(url)
file = File.basename(url)
- # Download tarball if the local copy is older than at the given URL or fetch
- # it for the first time.
+ # Resume an interrupted download or fetch the file for the first time. If
+ # the file on the server is newer, then it is downloaded from start.
+ sh "wget -Nc #{url}"
+ # If the local copy is already fully retrieved, then the previous command
+ # ignores the timestamp. So we check with the server again if the file on
+ # the server is newer and if so download the new copy.
sh "wget -N #{url}"
- # Resume aborted download. Do nothing if the file is already fully retrieved
- # (at the cost is a round trip to server).
- sh "wget -c #{url}"
- # Always download md5 and verify the tarball. Re-download tarball if corrupt;
- # extract otherwise.
+ # Immediately download md5 and verify the tarball. Re-download tarball if
+ # corrupt; extract otherwise.
sh "wget #{url}.md5 && md5sum -c #{file}.md5" do |matched, _|
if !matched
sh "rm #{file} #{file}.md5"; download(url)
else
sh "tar xvf #{file}"
end
end
end
+# Connects to NCBI's FTP server, gets the URL of all database volumes and
+# returns them grouped by database name:
+#
+# {'nr' => ['ftp://...', ...], 'nt' => [...], ...}
+#
def databases
host, dir = 'ftp.ncbi.nlm.nih.gov', 'blast/db'
usr, pswd = 'anonymous', ENV['email']
Net::FTP.open(host, usr, pswd) do |con|
@@ -33,12 +39,16 @@
select { |file| file.match(/\.tar\.gz$/) }.
group_by { |file| File.basename(file).split('.')[0] }
end
end
+# Create user-facing task for each database to drive the download of its
+# volumes in parallel.
databases.each do |name, files|
multitask(name => files.map { |file| task(file) { download(file) } })
end
+# List name of all databases that can be downloaded if executed without
+# any arguments.
task :default do
puts databases.keys.join(', ')
end