proxy.rb in twitterscraper-ruby-0.4.0

- old
+ new
@@ -4,25 +4,54 @@
     PROXY_URL = 'https://free-proxy-list.net/'
 
     class RetryExhausted < StandardError
     end
 
+    class Result
+      def initialize(items)
+        @items = items.shuffle
+        @cur_index = 0
+      end
+
+      def sample
+        if @cur_index >= @items.size
+          reload
+        end
+        @cur_index += 1
+        @items[@cur_index - 1]
+      end
+
+      def size
+        @items.size
+      end
+
+      private
+
+      def reload
+        @items = Proxy.get_proxies.shuffle
+        @cur_index = 0
+        Twitterscraper.logger.debug "Reload #{proxies.size} proxies"
+      end
+    end
+
     module_function
 
     def get_proxies(retries = 3)
       response = Twitterscraper::Http.get(PROXY_URL)
       html = Nokogiri::HTML(response)
-      table = html.xpath('//*[@id="proxylisttable"]').first
+      table = html.xpath('//table[@id="proxylisttable"]').first
 
       proxies = []
 
       table.xpath('tbody/tr').each do |tr|
         cells = tr.xpath('td')
-        ip, port = cells[0].text.strip, cells[1].text.strip
+        ip, port, https = [0, 1, 6].map { |i| cells[i].text.strip }
+        next if https == 'no'
         proxies << ip + ':' + port
       end
 
-      proxies
+      Twitterscraper.logger.debug "Fetch #{proxies.size} proxies"
+      Result.new(proxies)
     rescue => e
       if (retries -= 1) > 0
         retry
       else
         raise RetryExhausted.new(e.inspect)