module Janis module Parsing module SpecificParsers class ProxyWebsiteParser attr_reader :url @@subclasses = [] def self.subclasses @@subclasses end def self.inherited(subclass) add_subclass(subclass) end def initialize @url = self.class.url end def self.url raise "Subclass Responsibility!" end # It should return an array of strings. Each string should have the format "IP:PORT" def parse raise "Subclass Responsibility!" end private def obtain_html_doc if self.url.include?("http://") Nokogiri::HTML(get_content_by_http(self.url)) elsif url.include?("file://") Nokogiri::HTML(read_content_from_file(self.url)) else raise "#{self.url} is not a supported URL!" end end def get_content_by_http(url) open(self.url) end def read_content_from_file(url) File::open(self.url.gsub('file://',''), 'r').read end def self.add_subclass(subclass) @@subclasses << subclass end end end end end