lib/tansaku/crawler.rb in tansaku-0.1.0 vs lib/tansaku/crawler.rb in tansaku-0.1.1
- old
+ new
@@ -1,10 +1,10 @@
# frozen_string_literal: true
require "cgi"
require "net/http"
-require "thread/pool"
+require "parallel"
require "uri"
module Tansaku
class Crawler
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393"
@@ -29,19 +29,13 @@
res = get(url)
[200, 401, 302].include? res.code.to_i
end
def crawl
- pool = Thread.pool(threads)
- results = []
- urls.each do |url|
- pool.process do
- results << url if online?(url)
- end
+ results = Parallel.map(urls, in_threads: threads) do |url|
+ url if online?(url)
end
- pool.shutdown
-
- results
+ results.compact
end
private
def valid_uri?