lib/postrank-uri.rb in postrank-uri-1.0.13 vs lib/postrank-uri.rb in postrank-uri-1.0.14

- old
+ new

@@ -16,10 +16,26 @@ end dom = dp.public_suffix dom = dp.domain.downcase + "." + dom unless dp.domain.empty? end + + def normalized_query + @normalized_query ||= (begin + if self.query && self.query.strip != '' + (self.query.strip.split("&", -1).map do |pair| + Addressable::URI.normalize_component( + pair, + Addressable::URI::CharacterClasses::QUERY.sub("\\&", "") + ) + end).join("&") + else + nil + end + end) + end + end end module PostRank module URI @@ -31,11 +47,11 @@ C18N[:hosts] = c18ndb[:hosts].inject({}) {|h,(k,v)| h[/#{Regexp.escape(k)}$/.freeze] = v; h} URIREGEX = {} URIREGEX[:protocol] = /https?:\/\//i URIREGEX[:valid_preceding_chars] = /(?:|\.|[^-\/"':!=A-Z0-9_@@]|^|\:)/i - URIREGEX[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i + URIREGEX[:valid_domain] = /\b(?:[a-z0-9-]{1,63}\.){1,}[a-z]{2,63}(?::[0-9]+)?/i URIREGEX[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~]/i # Allow URL paths to contain balanced parens # 1. Used in Wikipedia URLs like /Primer_(film) # 2. Used in IIS sessions like /S(dfd346)/ @@ -201,11 +217,10 @@ end end end uri.scheme = 'http' if uri.host && !uri.scheme - - uri.normalize + uri.normalize! end end end