lib/twitterscraper/proxy.rb in twitterscraper-ruby-0.3.0 vs lib/twitterscraper/proxy.rb in twitterscraper-ruby-0.4.0
- old
+ new
@@ -4,25 +4,54 @@
PROXY_URL = 'https://free-proxy-list.net/'
class RetryExhausted < StandardError
end
+ class Result
+ def initialize(items)
+ @items = items.shuffle
+ @cur_index = 0
+ end
+
+ def sample
+ if @cur_index >= @items.size
+ reload
+ end
+ @cur_index += 1
+ @items[@cur_index - 1]
+ end
+
+ def size
+ @items.size
+ end
+
+ private
+
+ def reload
+ @items = Proxy.get_proxies.shuffle
+ @cur_index = 0
+ Twitterscraper.logger.debug "Reload #{proxies.size} proxies"
+ end
+ end
+
module_function
def get_proxies(retries = 3)
response = Twitterscraper::Http.get(PROXY_URL)
html = Nokogiri::HTML(response)
- table = html.xpath('//*[@id="proxylisttable"]').first
+ table = html.xpath('//table[@id="proxylisttable"]').first
proxies = []
table.xpath('tbody/tr').each do |tr|
cells = tr.xpath('td')
- ip, port = cells[0].text.strip, cells[1].text.strip
+ ip, port, https = [0, 1, 6].map { |i| cells[i].text.strip }
+ next if https == 'no'
proxies << ip + ':' + port
end
- proxies
+ Twitterscraper.logger.debug "Fetch #{proxies.size} proxies"
+ Result.new(proxies)
rescue => e
if (retries -= 1) > 0
retry
else
raise RetryExhausted.new(e.inspect)