lib/rawler/base.rb in rawler-0.0.4 vs lib/rawler/base.rb in rawler-0.0.5

- old
+ new

@@ -20,10 +20,12 @@ private def validate_links_in_page(current_url) Rawler::Crawler.new(current_url).links.each do |page_url| validate_page(page_url) + # Todo: include this in a configuration option + sleep(3) end end def validate_page(page_url) if not_yet_parsed?(page_url) @@ -37,16 +39,16 @@ write("#{response.code} - #{link}") responses[link] = { :status => response.code.to_i } rescue Errno::ECONNREFUSED write("Connection refused - '#{link}'") - rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError, - Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError + rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, Errno::ETIMEDOUT, + EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError write("Connection problems - '#{link}'") end def same_domain?(link) - URI.parse(URI.encode(Rawler.url)).host == URI.parse(URI.encode(link)).host + URI.parse(Rawler.url).host == URI.parse(link).host end def not_yet_parsed?(link) responses[link].nil? end \ No newline at end of file