Sha256: 7bcee2f3e3414c69434e3c4f09469e24032da016367db9633d2d216cdd66a4dd
Contents?: true
Size: 1.81 KB
Versions: 2
Compression:
Stored size: 1.81 KB
Contents
require 'janis/version' require 'janis/parsing' require 'janis/testing' # TODO: Sites to be supported for scraping # http://incloak.es/proxy-list/ # http://spys.ru/free-proxy-list/ # http://www.samair.ru/proxy/ # http://www.proxys.com.ar/ module Janis IP_PORT_SEPARATOR = ':' def self.find(amount, opts = {}) # Makes sure opts[:websites] is a subset of the supported websites. Otherwise, it takes the whole list. if opts[:websites] opts[:websites].each do |website| raise "#{website} is not supported!" unless Janis.supported_websites.include?(website) end websites = opts[:websites] else websites = Janis.supported_websites end total_results = [] websites.each do |website| if total_results.size < amount new_results = Parsing.parse_from(website).map { |entry| build_proxy_hash(entry, website) } total_results += new_results end end opts[:criteria] ? Janis::Testing.filter_results(criteria, total_results[0..amount - 1]) : total_results[0..amount -1] end def self.supported_websites Janis::Parsing::SpecificParsers::ProxyWebsiteParser.subclasses.map { |klass| self.website_name_for(klass.to_s)} end private def self.build_proxy_hash(proxy_string, website) { ip: proxy_string.split(IP_PORT_SEPARATOR).first, port: proxy_string.split(IP_PORT_SEPARATOR).last, source: website } end #TODO: This should be probably moved to a name helper module def self.website_name_for(parser_klass_name) parser_klass_name.gsub(/::/, '/'). gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2'). gsub(/([a-z\d])([A-Z])/,'\1_\2'). tr("-", "_"). gsub("_Parser",""). split('/'). last. downcase.to_sym #TODO: converts a parser class name to a :symbol_in_snake_case website name end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
janis-0.1.4 | lib/janis.rb |
janis-0.1.3 | lib/janis.rb |