lib/postrank-uri.rb in postrank-uri-1.0.16 vs lib/postrank-uri.rb in postrank-uri-1.0.17
- old
+ new
@@ -1,24 +1,18 @@
# -*- encoding: utf-8 -*-
require 'addressable/uri'
-require 'domainatrix'
require 'digest/md5'
require 'nokogiri'
+require 'public_suffix'
require 'yaml'
module Addressable
class URI
def domain
- begin
- dp = Domainatrix.parse(self)
- rescue
- return nil
- end
-
- dom = dp.public_suffix
- dom = dp.domain.downcase + "." + dom unless dp.domain.empty?
+ host = self.host
+ (host && PublicSuffix.valid?(host)) ? PublicSuffix.parse(host).domain : nil
end
def normalized_query
@normalized_query ||= (begin
if self.query && self.query.strip != ''
@@ -101,15 +95,14 @@
def extract(text)
return [] if !text
urls = []
text.to_s.scan(URIREGEX[:valid_url]) do |all, before, url, protocol, domain, path, query|
- begin
+ # Only extract the URL if the domain is valid
+ if PublicSuffix.valid?(domain)
url = clean(url)
- Domainatrix.parse(url)
urls.push url.to_s
- rescue NoMethodError
end
end
urls.compact
end
@@ -221,12 +214,20 @@
uri.scheme = 'http' if uri.host && !uri.scheme
uri.normalize!
end
def valid?(uri)
- Domainatrix.parse(uri)
- true
- rescue
- false
+ # URI is only valid if it is not nil, parses cleanly as a URI,
+ # and the domain has a recognized, valid TLD component
+ return false if uri.nil?
+
+ is_valid = false
+ cleaned_uri = clean(uri, :raw => true)
+
+ if host = cleaned_uri.host
+ is_valid = PublicSuffix.valid?(host)
+ end
+
+ is_valid
end
end
end