lib/rawler/base.rb in rawler-0.1.5 vs lib/rawler/base.rb in rawler-0.1.6

- old
+ new

@@ -1,8 +1,10 @@ module Rawler class Base + DEFAULT_LOGFILE = "rawler_log.txt" + attr_accessor :responses def initialize(url, output, options={}) @responses = {} @@ -10,13 +12,17 @@ output.sync = true Rawler.output = Logger.new(output) Rawler.username = options[:username] Rawler.password = options[:password] Rawler.wait = options[:wait] - Rawler.log = options[:log] Rawler.css = options[:css] - @logfile = File.new("rawler_log.txt", "w") if Rawler.log + + # Using a custom logfile implies logging. + Rawler.logfile = options[:logfile] || DEFAULT_LOGFILE + Rawler.log = options[:log] || Rawler.logfile != DEFAULT_LOGFILE + + @logfile = File.new(Rawler.logfile, "w") if Rawler.log end def validate validate_links_in_page(Rawler.url) @logfile.close if Rawler.log @@ -53,12 +59,14 @@ end def add_status_code(link, from_url) response = Rawler::Request.get(link) - validate_page(response['Location'], from_url) if response['Location'] record_response(response.code, link, from_url, response['Location']) responses[link] = { :status => response.code.to_i } + + validate_page(response['Location'], from_url) if response['Location'] + rescue Errno::ECONNREFUSED error("Connection refused - #{link} - Called from: #{from_url}") rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError, SocketError error("Connection problems - #{link} - Called from: #{from_url}")