lib/rawler/base.rb in rawler-0.1.5 vs lib/rawler/base.rb in rawler-0.1.6
- old
+ new
@@ -1,8 +1,10 @@
module Rawler
class Base
+ DEFAULT_LOGFILE = "rawler_log.txt"
+
attr_accessor :responses
def initialize(url, output, options={})
@responses = {}
@@ -10,13 +12,17 @@
output.sync = true
Rawler.output = Logger.new(output)
Rawler.username = options[:username]
Rawler.password = options[:password]
Rawler.wait = options[:wait]
- Rawler.log = options[:log]
Rawler.css = options[:css]
- @logfile = File.new("rawler_log.txt", "w") if Rawler.log
+
+ # Using a custom logfile implies logging.
+ Rawler.logfile = options[:logfile] || DEFAULT_LOGFILE
+ Rawler.log = options[:log] || Rawler.logfile != DEFAULT_LOGFILE
+
+ @logfile = File.new(Rawler.logfile, "w") if Rawler.log
end
def validate
validate_links_in_page(Rawler.url)
@logfile.close if Rawler.log
@@ -53,12 +59,14 @@
end
def add_status_code(link, from_url)
response = Rawler::Request.get(link)
- validate_page(response['Location'], from_url) if response['Location']
record_response(response.code, link, from_url, response['Location'])
responses[link] = { :status => response.code.to_i }
+
+ validate_page(response['Location'], from_url) if response['Location']
+
rescue Errno::ECONNREFUSED
error("Connection refused - #{link} - Called from: #{from_url}")
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, Errno::ETIMEDOUT,
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError, SocketError
error("Connection problems - #{link} - Called from: #{from_url}")