Sha256: d337d9c18116fe0c04d724eea532f300b3faa03a562a9e37983f5eea1892122d

Contents?: true

Size: 1.89 KB

Versions: 2

Compression:

Stored size: 1.89 KB

Contents

require 'nokogiri';
require_relative 'parsing_tools/capybara_with_phantom_js'

module Janis
  
  module Parsing

    module SpecificParsers
      class ProxyListOrgParser < ProxyWebsiteParser

        include CapybaraWithPhantomJs

        Struct.new('Row', :proxy, :country, :city, :type, :speed, :https_ssl)

        def self.url
          'http://proxy-list.org'
        end

        def initialize
          super
          configure_capybara
          @session = new_session 
          @session.visit(url)
          obtain_html_doc
        end

        def configure_capybara
          Capybara.configure { |c| c.app_host = url }
        end

        def parse
          total_rows = []
          total_rows += rows
          [2,3,4,5,6,7,8,9,10].each do |page_number|
            @session.click_link(page_number.to_s)
            obtain_html_doc
            total_rows += rows
          end
          total_rows

          #TODO: This map is here to adapt #parse output to the one expected by Janis.find. Remove this when it starts accepting
          #more info about each proxy server.
          total_rows.map do |row|
            row.proxy
          end
        end

        private
        
        def obtain_html_doc
          @html_doc = Nokogiri.HTML(@session.html)
        end

        def rows
          rows_in_html = @html_doc.css('ul').select { |ul| ul.to_s.match /\d\d\d\./}
          results = rows_in_html.map do |row_html| 
            row_object = Struct::Row.new( #TODO: This should be an actual class, and should have methods to retrieve all attributes.
            row_html.css('.proxy').children.last.text,
            row_html.css('.country').text,
            row_html.css('.city').text,
            row_html.css('.type').text,
            row_html.css('.speed').text,
            row_html.css('.https').text
                                        )
          end
        end
      end
    end

  end

end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
janis-0.1.4 lib/janis/specific_parsers/proxy-list_org.rb
janis-0.1.3 lib/janis/specific_parsers/proxy-list_org.rb