lib/retriever/page.rb in rubyretriever-1.4.5 vs lib/retriever/page.rb in rubyretriever-1.4.6
- old
+ new
@@ -50,11 +50,17 @@
@links = @source.scan(HREF_CONTENTS_RE).map do |match|
# filter some malformed URLS that come in
# meant to be a loose filter to catch all reasonable HREF attributes.
link = match[0]
next if HASH_RE =~ link
- Link.new(@t.scheme, @t.host, link, @url).path
+ Link.new(@t.scheme, host_with_port, link, @url).path
end.compact.uniq
+ end
+
+ def host_with_port
+ return @t.host if @t.port.nil?
+
+ @t.host + ':' + @t.port.to_s
end
def parse_internal
links.select do |x|
@t.host == Addressable::URI.parse(Addressable::URI.encode(x)).host