Sha256: 2581afb9f7355c2773d372ee42e4e12471fc7d14dcc90a4d259f7710904295d3
Contents?: true
Size: 1.42 KB
Versions: 2
Compression:
Stored size: 1.42 KB
Contents
module Rawler class Base attr_accessor :url, :responses def initialize(url, output) @url = url @responses = {} $output = output end def validate validate_links_in_page(url) end private def validate_links_in_page(current_url) Rawler::Crawler.new(current_url).links.each do |page_url| validate_page(page_url) end end def validate_page(page_url) if not_yet_parsed?(page_url) add_status_code(page_url) validate_links_in_page(page_url) if same_domain?(page_url) end end def add_status_code(link) uri = URI.parse(link) response = nil Net::HTTP.start(uri.host, uri.port) do |http| path = (uri.path.size == 0) ? "/" : uri.path response = http.head(path, {'User-Agent'=>'Rawler'}) end $output.puts("#{response.code} - #{link}") responses[link] = { :status => response.code.to_i } rescue Errno::ECONNREFUSED puts "Connection refused - '#{link}'" rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError puts "Connection problems - #{link}" end def same_domain?(link) URI.parse(url).host == URI.parse(link).host end def not_yet_parsed?(link) responses[link].nil? end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
rawler-0.0.2 | lib/rawler/base.rb |
rawler-0.0.1 | lib/rawler/base.rb |