lib/bio/io/ncbirest.rb in bio-1.3.0 vs lib/bio/io/ncbirest.rb in bio-1.3.1
- old
+ new
@@ -24,12 +24,14 @@
# Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
# weekdays for any series of more than 100 requests.
# -> Not implemented yet in BioRuby
- # Make no more than one request every 3 seconds.
- NCBI_INTERVAL = 3
+ # Make no more than one request every 1 seconds.
+ # (NCBI's restriction is "Make no more than 3 requests every 1 second.",
+ # but limited to 1/sec partly because of keeping the value in integer.)
+ NCBI_INTERVAL = 1
@@last_access = nil
private
def ncbi_access_wait(wait = NCBI_INTERVAL)
@@ -125,14 +127,14 @@
# * "subs": Subset [SB]
# * _reldate_: 365
# * _mindate_: 2001
# * _maxdate_: 2002/01/01
# * _datetype_: "edat"
- # * _limit_: maximum number of entries to be returned (0 for unlimited)
+ # * _limit_: maximum number of entries to be returned (0 for unlimited; nil for the "retmax" value in the hash or the internal default value (=100))
# * _step_: maximum number of entries retrieved at a time
# *Returns*:: array of entry IDs or a number of results
- def esearch(str, hash = {}, limit = 100, step = 10000)
+ def esearch(str, hash = {}, limit = nil, step = 10000)
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
opts = {
"tool" => "bioruby",
"term" => str,
}
@@ -141,15 +143,20 @@
case opts["rettype"]
when "count"
count = esearch_count(str, opts)
return count
else
+ retstart = 0
+ retstart = hash["retstart"].to_i if hash["retstart"]
+
+ limit ||= hash["retmax"].to_i if hash["retmax"]
+ limit ||= 100 # default limit is 100
limit = esearch_count(str, opts) if limit == 0 # unlimit
list = []
0.step(limit, step) do |i|
retmax = [step, limit - i].min
- opts.update("retmax" => retmax, "retstart" => i)
+ opts.update("retmax" => retmax, "retstart" => i + retstart)
ncbi_access_wait
response = Bio::Command.post_form(serv, opts)
result = response.body
list += result.scan(/<Id>(.*?)<\/Id>/m).flatten
end