lib/postrank-uri.rb in postrank-uri-1.0.3 vs lib/postrank-uri.rb in postrank-uri-1.0.4
- old
+ new
@@ -70,13 +70,13 @@
def extract(text)
return [] if !text
urls = []
text.to_s.scan(URIREGEX[:valid_url]) do |all, before, url, protocol, domain, path, query|
begin
- url = clean(url).to_s
+ url = clean(url)
Domainatrix.parse(url)
- urls.push url
+ urls.push url.to_s
rescue NoMethodError
end
end
urls.compact
@@ -84,11 +84,11 @@
def extract_href(text, host = nil)
urls = []
Nokogiri.HTML(text).search('a').each do |a|
begin
- url = normalize(c18n(unescape(a.attr('href'))))
+ url = clean(a.attr('href'), false)
if url.host.empty?
next if host.nil?
url.host = host
end
@@ -110,11 +110,12 @@
uri.tr('+', ' ').gsub(URIREGEX[:unescape]) do
[$1.delete('%')].pack('H*')
end
end
- def clean(uri)
- normalize(c18n(unescape(uri))).to_s
+ def clean(uri, string = true)
+ uri = normalize(c18n(unescape(uri)))
+ string ? uri.to_s : uri
end
def normalize(uri)
u = parse(uri)
u.path = u.path.squeeze('/')
\ No newline at end of file