lib/postrank-uri.rb in postrank-uri-1.0.12 vs lib/postrank-uri.rb in postrank-uri-1.0.13

- old
+ new

@@ -100,16 +100,14 @@ def extract_href(text, host = nil) urls = [] Nokogiri.HTML(text).search('a').each do |a| begin - url = clean(a.attr('href'), :raw => true) - if url.host.empty? - next if host.nil? - url.host = host - end + url = clean(a.attr('href'), :raw => true, :host => host) + next unless url.absolute? + urls.push [url.to_s, a.text] rescue next end end @@ -127,29 +125,29 @@ [$1.delete('%')].pack('H*') end end def clean(uri, opts = {}) - uri = normalize(c18n(unescape(uri))) + uri = normalize(c18n(unescape(uri), opts)) opts[:raw] ? uri : uri.to_s end def hash(uri, opts = {}) Digest::MD5.hexdigest(opts[:clean] == true ? clean(uri) : uri) end - def normalize(uri) - u = parse(uri) + def normalize(uri, opts = {}) + u = parse(uri, opts) u.path = u.path.squeeze('/') u.path = u.path.chomp('/') if u.path.size != 1 u.query = nil if u.query && u.query.empty? u.fragment = nil u end - def c18n(uri) - u = parse(uri) + def c18n(uri, opts = {}) + u = parse(uri, opts) u = embedded(u) if q = u.query_values(:notation => :flat_array) q.delete_if { |k,v| C18N[:global].include?(k) } q.delete_if { |k,v| C18N[:hosts].find {|r,p| u.host =~ r && p.include?(k) } } @@ -179,14 +177,36 @@ uri = clean(embedded, :raw => true) if embedded uri end - def parse(uri) + def parse(uri, opts = {}) return uri if uri.is_a? Addressable::URI - uri = uri.index(URIREGEX[:protocol]) == 0 ? uri : "http://#{uri}" - Addressable::URI.parse(uri).normalize + uri = Addressable::URI.parse(uri) + + unless uri.host + if uri.scheme + # With no host and scheme yes, the parser exploded + return parse("http://#{uri}", opts) + end + + if opts[:host] + uri.host = opts[:host] + else + parts = uri.path.to_s.split(/[\/:]/) + if parts.first =~ URIREGEX[:valid_domain] + host = parts.shift + uri.path = '/' + parts.join('/') + uri.host = host + end + end + end + + uri.scheme = 'http' if uri.host && !uri.scheme + + uri.normalize end end -end \ No newline at end of file +end +