lib/twitterscraper/query.rb in twitterscraper-ruby-0.15.2 vs lib/twitterscraper/query.rb in twitterscraper-ruby-0.16.0
- old
+ new
@@ -67,11 +67,10 @@
json_resp = nil
items_html = text
else
json_resp = JSON.parse(text)
items_html = json_resp['items_html'] || ''
- logger.warn json_resp['message'] if json_resp['message'] # Sorry, you are rate limited.
end
[items_html, json_resp]
end
@@ -98,10 +97,16 @@
end
return [], nil if response.nil? || response.empty?
html, json_resp = parse_single_page(response, pos.nil?)
+ if json_resp && json_resp['message']
+ logger.warn json_resp['message'] # Sorry, you are rate limited.
+ @stop_requested = true
+ Cache.new.delete(url) if cache_enabled?
+ end
+
tweets = Tweet.from_html(html)
if tweets.empty?
return [], (json_resp && json_resp['has_more_items'] && json_resp['min_position'])
end
@@ -138,23 +143,31 @@
if start_date
if start_date < OLDEST_DATE
raise Error.new(":start_date must be greater than or equal to #{OLDEST_DATE}")
end
end
-
- if end_date
- today = Date.today
- if end_date > Date.today
- raise Error.new(":end_date must be less than or equal to today(#{today})")
- end
- end
end
def build_queries(query, start_date, end_date)
if start_date && end_date
- date_range = start_date.upto(end_date - 1)
- date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
+ # date_range = start_date.upto(end_date - 1)
+ # date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
+
+ queries = []
+ time = Time.utc(start_date.year, start_date.month, start_date.day, 0, 0, 0)
+ end_time = Time.utc(end_date.year, end_date.month, end_date.day, 0, 0, 0)
+
+ while true
+ if time < Time.now.utc
+ queries << (query + " since:#{time.strftime('%Y-%m-%d_%H:00:00')}_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H:00:00')}_UTC")
+ end
+ time += 3600
+ break if time >= end_time
+ end
+
+ queries
+
elsif start_date
[query + " since:#{start_date}"]
elsif end_date
[query + " until:#{end_date}"]
else
@@ -200,21 +213,19 @@
start_date = Date.parse(start_date) if start_date && start_date.is_a?(String)
end_date = Date.parse(end_date) if end_date && end_date.is_a?(String)
queries = build_queries(query, start_date, end_date)
type = Type.new(type)
if threads > queries.size
- logger.warn 'The maximum number of :threads is the number of dates between :start_date and :end_date.'
threads = queries.size
end
if proxy_enabled?
proxies = Proxy::Pool.new
logger.debug "Fetch #{proxies.size} proxies"
else
proxies = []
logger.debug 'Proxy disabled'
end
logger.debug "Cache #{cache_enabled? ? 'enabled' : 'disabled'}"
-
validate_options!(queries, type: type, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads)
logger.info "The number of threads #{threads}"