lib/postrank-uri.rb in postrank-uri-1.0.13 vs lib/postrank-uri.rb in postrank-uri-1.0.14
- old
+ new
@@ -16,10 +16,26 @@
end
dom = dp.public_suffix
dom = dp.domain.downcase + "." + dom unless dp.domain.empty?
end
+
+ def normalized_query
+ @normalized_query ||= (begin
+ if self.query && self.query.strip != ''
+ (self.query.strip.split("&", -1).map do |pair|
+ Addressable::URI.normalize_component(
+ pair,
+ Addressable::URI::CharacterClasses::QUERY.sub("\\&", "")
+ )
+ end).join("&")
+ else
+ nil
+ end
+ end)
+ end
+
end
end
module PostRank
module URI
@@ -31,11 +47,11 @@
C18N[:hosts] = c18ndb[:hosts].inject({}) {|h,(k,v)| h[/#{Regexp.escape(k)}$/.freeze] = v; h}
URIREGEX = {}
URIREGEX[:protocol] = /https?:\/\//i
URIREGEX[:valid_preceding_chars] = /(?:|\.|[^-\/"':!=A-Z0-9_@@]|^|\:)/i
- URIREGEX[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
+ URIREGEX[:valid_domain] = /\b(?:[a-z0-9-]{1,63}\.){1,}[a-z]{2,63}(?::[0-9]+)?/i
URIREGEX[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~]/i
# Allow URL paths to contain balanced parens
# 1. Used in Wikipedia URLs like /Primer_(film)
# 2. Used in IIS sessions like /S(dfd346)/
@@ -201,11 +217,10 @@
end
end
end
uri.scheme = 'http' if uri.host && !uri.scheme
-
- uri.normalize
+ uri.normalize!
end
end
end