lib/postrank-uri.rb in postrank-uri-1.0.17 vs lib/postrank-uri.rb in postrank-uri-1.0.18

- old
+ new

@@ -1,6 +1,5 @@ -# -*- encoding: utf-8 -*- require 'addressable/uri' require 'digest/md5' require 'nokogiri' require 'public_suffix' @@ -32,15 +31,15 @@ end module PostRank module URI - c18ndb = YAML.load_file(File.dirname(__FILE__) + '/postrank-uri/c18n.yml') + c14ndb = YAML.load_file(File.dirname(__FILE__) + '/postrank-uri/c14n.yml') - C18N = {} - C18N[:global] = c18ndb[:all].freeze - C18N[:hosts] = c18ndb[:hosts].inject({}) {|h,(k,v)| h[/#{Regexp.escape(k)}$/.freeze] = v; h} + C14N = {} + C14N[:global] = c14ndb[:all].freeze + C14N[:hosts] = c14ndb[:hosts].inject({}) {|h,(k,v)| h[/#{Regexp.escape(k)}$/.freeze] = v; h} URIREGEX = {} URIREGEX[:protocol] = /https?:\/\//i URIREGEX[:valid_preceding_chars] = /(?:|\.|[^-\/"':!=A-Z0-9_@@]|^|\:)/i URIREGEX[:valid_domain] = /\b(?:[a-z0-9-]{1,63}\.){1,}[a-z]{2,63}(?::[0-9]+)?/i @@ -128,17 +127,19 @@ '%' + $1.unpack('H2' * $1.size).join('%').upcase end.gsub(' ','%20') end def unescape(uri) - uri.tr('+', ' ').gsub(URIREGEX[:unescape]) do + u = parse(uri) + u.query = u.query.tr('+', ' ') if u.query + u.to_s.gsub(URIREGEX[:unescape]) do [$1.delete('%')].pack('H*') end end def clean(uri, opts = {}) - uri = normalize(c18n(unescape(uri), opts)) + uri = normalize(c14n(unescape(uri), opts)) opts[:raw] ? uri : uri.to_s end def hash(uri, opts = {}) Digest::MD5.hexdigest(opts[:clean] == true ? clean(uri) : uri) @@ -151,17 +152,17 @@ u.query = nil if u.query && u.query.empty? u.fragment = nil u end - def c18n(uri, opts = {}) + def c14n(uri, opts = {}) u = parse(uri, opts) u = embedded(u) if q = u.query_values(Array) - q.delete_if { |k,v| C18N[:global].include?(k) } - q.delete_if { |k,v| C18N[:hosts].find {|r,p| u.host =~ r && p.include?(k) } } + q.delete_if { |k,v| C14N[:global].include?(k) } + q.delete_if { |k,v| C14N[:hosts].find {|r,p| u.host =~ r && p.include?(k) } } end u.query_values = q if u.host =~ /^(mobile\.)?twitter\.com$/ && u.fragment && u.fragment.match(/^!(.*)/) u.fragment = nil @@ -222,10 +223,10 @@ is_valid = false cleaned_uri = clean(uri, :raw => true) if host = cleaned_uri.host - is_valid = PublicSuffix.valid?(host) + is_valid = PublicSuffix.valid?(Addressable::IDNA.to_unicode(host)) end is_valid end end