lib/twitterscraper/query.rb in twitterscraper-ruby-0.15.2 vs lib/twitterscraper/query.rb in twitterscraper-ruby-0.16.0

- old
+ new

@@ -67,11 +67,10 @@ json_resp = nil items_html = text else json_resp = JSON.parse(text) items_html = json_resp['items_html'] || '' - logger.warn json_resp['message'] if json_resp['message'] # Sorry, you are rate limited. end [items_html, json_resp] end @@ -98,10 +97,16 @@ end return [], nil if response.nil? || response.empty? html, json_resp = parse_single_page(response, pos.nil?) + if json_resp && json_resp['message'] + logger.warn json_resp['message'] # Sorry, you are rate limited. + @stop_requested = true + Cache.new.delete(url) if cache_enabled? + end + tweets = Tweet.from_html(html) if tweets.empty? return [], (json_resp && json_resp['has_more_items'] && json_resp['min_position']) end @@ -138,23 +143,31 @@ if start_date if start_date < OLDEST_DATE raise Error.new(":start_date must be greater than or equal to #{OLDEST_DATE}") end end - - if end_date - today = Date.today - if end_date > Date.today - raise Error.new(":end_date must be less than or equal to today(#{today})") - end - end end def build_queries(query, start_date, end_date) if start_date && end_date - date_range = start_date.upto(end_date - 1) - date_range.map { |date| query + " since:#{date} until:#{date + 1}" } + # date_range = start_date.upto(end_date - 1) + # date_range.map { |date| query + " since:#{date} until:#{date + 1}" } + + queries = [] + time = Time.utc(start_date.year, start_date.month, start_date.day, 0, 0, 0) + end_time = Time.utc(end_date.year, end_date.month, end_date.day, 0, 0, 0) + + while true + if time < Time.now.utc + queries << (query + " since:#{time.strftime('%Y-%m-%d_%H:00:00')}_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H:00:00')}_UTC") + end + time += 3600 + break if time >= end_time + end + + queries + elsif start_date [query + " since:#{start_date}"] elsif end_date [query + " until:#{end_date}"] else @@ -200,21 +213,19 @@ start_date = Date.parse(start_date) if start_date && start_date.is_a?(String) end_date = Date.parse(end_date) if end_date && end_date.is_a?(String) queries = build_queries(query, start_date, end_date) type = Type.new(type) if threads > queries.size - logger.warn 'The maximum number of :threads is the number of dates between :start_date and :end_date.' threads = queries.size end if proxy_enabled? proxies = Proxy::Pool.new logger.debug "Fetch #{proxies.size} proxies" else proxies = [] logger.debug 'Proxy disabled' end logger.debug "Cache #{cache_enabled? ? 'enabled' : 'disabled'}" - validate_options!(queries, type: type, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads) logger.info "The number of threads #{threads}"