lib/postrank-uri.rb in postrank-uri-1.0.8 vs lib/postrank-uri.rb in postrank-uri-1.0.9
- old
+ new
@@ -4,10 +4,25 @@
require 'domainatrix'
require 'digest/md5'
require 'nokogiri'
require 'yaml'
+module Addressable
+ class URI
+ def domain
+ begin
+ dp = Domainatrix.parse(self)
+ rescue
+ return nil
+ end
+
+ dom = dp.public_suffix
+ dom = dp.domain.downcase + "." + dom unless dp.domain.empty?
+ end
+ end
+end
+
module PostRank
module URI
c18ndb = YAML.load_file(File.dirname(__FILE__) + '/postrank-uri/c18n.yml')
@@ -85,11 +100,11 @@
def extract_href(text, host = nil)
urls = []
Nokogiri.HTML(text).search('a').each do |a|
begin
- url = clean(a.attr('href'), false)
+ url = clean(a.attr('href'), :raw => true)
if url.host.empty?
next if host.nil?
url.host = host
end
@@ -111,17 +126,17 @@
uri.tr('+', ' ').gsub(URIREGEX[:unescape]) do
[$1.delete('%')].pack('H*')
end
end
- def clean(uri, string = true)
+ def clean(uri, opts = {})
uri = normalize(c18n(unescape(uri)))
- string ? uri.to_s : uri
+ opts[:raw] ? uri : uri.to_s
end
- def hash(uri)
- Digest::MD5.hexdigest(clean(uri))
+ def hash(uri, opts = {})
+ Digest::MD5.hexdigest(opts[:skip_clean] ? uri : clean(uri))
end
def normalize(uri)
u = parse(uri)
u.path = u.path.squeeze('/')
@@ -156,10 +171,10 @@
elsif uri.host =~ /myspace\.com/ && uri.path =~ /PostTo/
embedded = uri.query_values['u']
end
- uri = clean(embedded, false) if embedded
+ uri = clean(embedded, :raw => true) if embedded
uri
end
def parse(uri)
return uri if uri.is_a? Addressable::URI
\ No newline at end of file