Sha256: 7bcee2f3e3414c69434e3c4f09469e24032da016367db9633d2d216cdd66a4dd

Contents?: true

Size: 1.81 KB

Versions: 2

Compression:

Stored size: 1.81 KB

Contents

require 'janis/version'
require 'janis/parsing'
require 'janis/testing'



# TODO: Sites to be supported for scraping
# http://incloak.es/proxy-list/
# http://spys.ru/free-proxy-list/
# http://www.samair.ru/proxy/
# http://www.proxys.com.ar/

module Janis
  
  IP_PORT_SEPARATOR = ':'
  
  def self.find(amount, opts = {})

    # Makes sure opts[:websites] is a subset of the supported websites. Otherwise, it takes the whole list.
    if opts[:websites]
      opts[:websites].each do |website|
        raise "#{website} is not supported!" unless Janis.supported_websites.include?(website)
      end
      websites = opts[:websites]
    else
      websites = Janis.supported_websites
    end  

    total_results = []
      
    websites.each do |website| 
      if total_results.size < amount
        new_results = Parsing.parse_from(website).map { |entry| build_proxy_hash(entry, website) }
        total_results += new_results
      end
    end
    opts[:criteria] ? Janis::Testing.filter_results(criteria, total_results[0..amount - 1]) : total_results[0..amount -1]
	end
 
  def self.supported_websites
    Janis::Parsing::SpecificParsers::ProxyWebsiteParser.subclasses.map { |klass| self.website_name_for(klass.to_s)}
  end

  private

  def self.build_proxy_hash(proxy_string, website)
    { 
      ip: proxy_string.split(IP_PORT_SEPARATOR).first,
      port: proxy_string.split(IP_PORT_SEPARATOR).last,
      source: website
    }
  end

  #TODO: This should be probably moved to a name helper module
  def self.website_name_for(parser_klass_name)
    parser_klass_name.gsub(/::/, '/').
    gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
    gsub(/([a-z\d])([A-Z])/,'\1_\2').
    tr("-", "_").
    gsub("_Parser","").
    split('/').
    last.
    downcase.to_sym
    #TODO: converts a parser class name to a :symbol_in_snake_case website name
  end

end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
janis-0.1.4 lib/janis.rb
janis-0.1.3 lib/janis.rb