lib/relevance/tarantula/invalid_html_handler.rb in tarantula-0.4.1 vs lib/relevance/tarantula/invalid_html_handler.rb in tarantula-0.4.2

- old
+ new

@@ -3,10 +3,13 @@ class InvalidHtmlHandler include Relevance::Tarantula def handle(result) response = result.response - return unless response.html? + unless response.html? + log "Skipping #{self.class} on url: #{result.url} because response is not html." + return + end begin body = HTML::Document.new(response.body, true) rescue Exception => e error_result = result.dup error_result.success = false