lib/retriever/page.rb in rubyretriever-1.4.5 vs lib/retriever/page.rb in rubyretriever-1.4.6

- old
+ new

@@ -50,11 +50,17 @@ @links = @source.scan(HREF_CONTENTS_RE).map do |match| # filter some malformed URLS that come in # meant to be a loose filter to catch all reasonable HREF attributes. link = match[0] next if HASH_RE =~ link - Link.new(@t.scheme, @t.host, link, @url).path + Link.new(@t.scheme, host_with_port, link, @url).path end.compact.uniq + end + + def host_with_port + return @t.host if @t.port.nil? + + @t.host + ':' + @t.port.to_s end def parse_internal links.select do |x| @t.host == Addressable::URI.parse(Addressable::URI.encode(x)).host