lib/wmap/utils/url_magic.rb in wmap-2.7.6 vs lib/wmap/utils/url_magic.rb in wmap-2.7.7

- old
+ new

@@ -261,11 +261,11 @@ end puts "Found absolute URL: #{absolute_url}" if @verbose return absolute_url rescue => ee puts "Exception on method #{__method__}: #{ee}" if @verbose - return nil + return nil end # Normalize the URL to a consistent manner in order to determine if a link has been visited or cached before # See http://en.wikipedia.org/wiki/URL_normalization for more explanation def normalize_url(url) @@ -290,11 +290,10 @@ rescue => ee puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose return url end - # Test the URL and return the response code def response_code (url) puts "Check the http response code on the url: #{url}" if @verbose code = 10000 # All unknown url connection exceptions go here raise "Invalid url: #{url}" unless is_url?(url) @@ -341,9 +340,41 @@ end @url_code=Hash.new unless @url_code @url_code[url]=code return code end + + # Test the URL and return the response headers + def response_headers (url) + puts "Check the http response headers on the url: #{url}" if @verbose + raise "Invalid url: #{url}" unless is_url?(url) + headers = Hash.new + url=url.strip.downcase + timeo = Max_http_timeout/1000.0 + uri = URI.parse(url) + http = Net::HTTP.new(uri.host, uri.port) + http.open_timeout = timeo + http.read_timeout = timeo + if (url =~ /https\:/i) + http.use_ssl = true + #http.ssl_version = :SSLv3 + # Bypass the remote web server cert validation test + http.verify_mode = OpenSSL::SSL::VERIFY_NONE + end + request = Net::HTTP::Get.new(uri.request_uri) + response = http.request(request) + puts "Server response the following: #{response}" if @verbose + response.each_header do |key,val| + puts "#{key} => #{val}" if @verbose + headers.merge!({key => val}) + end + puts "Response headers on #{url}: #{headers}" if @verbose + return headers + rescue => ee + puts "Exception on method #{__method__}: #{ee}" if @verbose + return nil + end + # Given an URL, open the page, then return the DOM text from a normal user perspective def open_page(url) args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe, read_timeout: Max_http_timeout/1000} doc = Nokogiri::HTML(open(url, args))