lib/sunscraper.rb in sunscraper-1.1.0.beta3 vs lib/sunscraper.rb in sunscraper-1.2.0.beta1
- old
+ new
@@ -1,7 +1,7 @@
-if !defined?(RUBY_ENGINE) && RUBY_VERSION =~ /^1.8/
- raise RuntimeError, "Sunscraper does not work on Ruby MRI 1.8.x."
+if RUBY_VERSION =~ /^1.8/
+ raise RuntimeError, "Sunscraper does not work on Ruby 1.8."
end
# Sunscraper loads an HTML page in a headless browser and waits for `Sunscraper.finish()`
# method to be called. It blocks the calling thread, but is threadsafe, does
# not acquire GIL and thus can be called from multiple threads simultaneously.
@@ -25,13 +25,13 @@
# Scrape an inline HTML. The content is loaded without a particular base URL.
# If your application depends on base URL being available, use {scrape_url}.
#
# @param [Integer] timeout timeout in milliseconds
- def scrape_html(html, timeout=5000)
+ def scrape_html(html, url="about:blank", timeout=5000)
scrape(timeout) do |worker, context|
- worker.load_html context, html
+ worker.load_html context, html, url
end
end
# Scrape an URL.
#
@@ -45,22 +45,22 @@
private
def scrape(timeout)
worker = load_worker
- context = worker.create
- yield worker, context
- worker.wait(context, timeout)
+ begin
+ context = worker.create
- data = worker.fetch(context)
+ yield worker, context
- if data == "!SUNSCRAPER_TIMEOUT"
- raise ScrapeTimeout, "Sunscraper has timed out waiting for the callback"
- else
- data
+ if worker.wait(context, timeout)
+ worker.fetch(context)
+ else
+ raise ScrapeTimeout, "Sunscraper has timed out waiting for the callback"
+ end
+ ensure
+ worker.finalize(context) if context
end
- ensure
- worker.discard(context) if context
end
def load_worker
case @worker
when :standalone
\ No newline at end of file