lib/postrank-uri.rb in postrank-uri-1.0.8 vs lib/postrank-uri.rb in postrank-uri-1.0.9

- old
+ new

@@ -4,10 +4,25 @@ require 'domainatrix' require 'digest/md5' require 'nokogiri' require 'yaml' +module Addressable + class URI + def domain + begin + dp = Domainatrix.parse(self) + rescue + return nil + end + + dom = dp.public_suffix + dom = dp.domain.downcase + "." + dom unless dp.domain.empty? + end + end +end + module PostRank module URI c18ndb = YAML.load_file(File.dirname(__FILE__) + '/postrank-uri/c18n.yml') @@ -85,11 +100,11 @@ def extract_href(text, host = nil) urls = [] Nokogiri.HTML(text).search('a').each do |a| begin - url = clean(a.attr('href'), false) + url = clean(a.attr('href'), :raw => true) if url.host.empty? next if host.nil? url.host = host end @@ -111,17 +126,17 @@ uri.tr('+', ' ').gsub(URIREGEX[:unescape]) do [$1.delete('%')].pack('H*') end end - def clean(uri, string = true) + def clean(uri, opts = {}) uri = normalize(c18n(unescape(uri))) - string ? uri.to_s : uri + opts[:raw] ? uri : uri.to_s end - def hash(uri) - Digest::MD5.hexdigest(clean(uri)) + def hash(uri, opts = {}) + Digest::MD5.hexdigest(opts[:skip_clean] ? uri : clean(uri)) end def normalize(uri) u = parse(uri) u.path = u.path.squeeze('/') @@ -156,10 +171,10 @@ elsif uri.host =~ /myspace\.com/ && uri.path =~ /PostTo/ embedded = uri.query_values['u'] end - uri = clean(embedded, false) if embedded + uri = clean(embedded, :raw => true) if embedded uri end def parse(uri) return uri if uri.is_a? Addressable::URI \ No newline at end of file