lib/relevance/tarantula/invalid_html_handler.rb in tarantula-0.4.1 vs lib/relevance/tarantula/invalid_html_handler.rb in tarantula-0.4.2
- old
+ new
@@ -3,10 +3,13 @@
class InvalidHtmlHandler
include Relevance::Tarantula
def handle(result)
response = result.response
- return unless response.html?
+ unless response.html?
+ log "Skipping #{self.class} on url: #{result.url} because response is not html."
+ return
+ end
begin
body = HTML::Document.new(response.body, true)
rescue Exception => e
error_result = result.dup
error_result.success = false