lib/benchmark/http/command/spider.rb in benchmark-http-0.9.0 vs lib/benchmark/http/command/spider.rb in benchmark-http-0.10.0
- old
+ new
@@ -16,17 +16,12 @@
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-require_relative '../seconds'
-require_relative '../statistics'
-require_relative '../links_filter'
+require_relative '../spider'
-require 'async'
-require 'async/http/client'
-require 'async/http/endpoint'
require 'async/await'
require 'samovar'
require 'uri'
@@ -44,100 +39,22 @@
end
many :urls, "One or more hosts to benchmark"
def log(method, url, response)
- puts "#{method} #{url} -> #{response.version} #{response.status} (#{response.body&.length || 'unspecified'} bytes)"
-
- response.headers.each do |key, value|
- puts "\t#{key}: #{value}"
- end if @options[:headers]
- end
-
- def extract_links(url, response)
- base = url
-
- body = response.read
-
- begin
- filter = LinksFilter.parse(body)
- rescue
- Async.logger.error($!)
- return []
- end
-
- if filter.base
- base = base + filter.base
- end
-
- filter.links.collect do |href|
- next if href.nil? or href.empty?
+ Async.logger.call(self, severity: (response.failure? ? :warn : :info)) do |buffer|
+ buffer.puts "#{method} #{url} -> #{response.version} #{response.status} (#{response.body&.length || 'unspecified'} bytes)"
- begin
- full_url = base + href
-
- if full_url.host == url.host && full_url.kind_of?(URI::HTTP)
- yield full_url
- end
- rescue ArgumentError, URI::InvalidURIError
- puts "Could not fetch #{href}, relative to #{base}."
- end
- end.compact
- end
-
- async def fetch(statistics, client, url, depth = @options[:depth], fetched = Set.new)
- return if fetched.include?(url) or depth == 0
-
- fetched << url
-
- request_uri = url.request_uri
-
- response = client.head(request_uri).tap(&:read)
-
- log("HEAD", url, response)
-
- if response.redirection?
- location = url + response.headers['location']
- if location.host == url.host
- puts "Following redirect to #{location}..."
- return fetch(statistics, client, location, depth-1, fetched).wait
- else
- puts "Ignoring redirect to #{location}."
- return
- end
+ response.headers.each do |key, value|
+ buffer.puts "\t#{key}: #{value}"
+ end if @options[:headers]
end
-
- content_type = response.headers['content-type']
- unless content_type&.start_with? 'text/html'
- puts "Unsupported content type: #{content_type}"
- return
- end
-
- response = statistics.measure do
- client.get(request_uri)
- end
-
- log("GET", url, response)
-
- extract_links(url, response) do |href|
- fetch(statistics, client, href, depth - 1, fetched)
- end.each(&:wait)
- rescue Async::TimeoutError
- Async.logger.error("Timeout while fetching #{url}")
- rescue StandardError
- Async.logger.error($!)
end
- async def call
- statistics = Statistics.new
+ sync def call
+ spider = HTTP::Spider.new(depth: @options[:depth])
- @urls.each do |url|
- endpoint = Async::HTTP::Endpoint.parse(url, timeout: 10)
-
- Async::HTTP::Client.open(endpoint, endpoint.protocol, connection_limit: 4) do |client|
- fetch(statistics, client, endpoint.url).wait
- end
- end
+ statistics = spider.call(@urls, &self.method(:log))
statistics.print
return statistics
end