lib/sunscraper.rb in sunscraper-1.1.0.beta3 vs lib/sunscraper.rb in sunscraper-1.2.0.beta1

- old
+ new

@@ -1,7 +1,7 @@ -if !defined?(RUBY_ENGINE) && RUBY_VERSION =~ /^1.8/ - raise RuntimeError, "Sunscraper does not work on Ruby MRI 1.8.x." +if RUBY_VERSION =~ /^1.8/ + raise RuntimeError, "Sunscraper does not work on Ruby 1.8." end # Sunscraper loads an HTML page in a headless browser and waits for `Sunscraper.finish()` # method to be called. It blocks the calling thread, but is threadsafe, does # not acquire GIL and thus can be called from multiple threads simultaneously. @@ -25,13 +25,13 @@ # Scrape an inline HTML. The content is loaded without a particular base URL. # If your application depends on base URL being available, use {scrape_url}. # # @param [Integer] timeout timeout in milliseconds - def scrape_html(html, timeout=5000) + def scrape_html(html, url="about:blank", timeout=5000) scrape(timeout) do |worker, context| - worker.load_html context, html + worker.load_html context, html, url end end # Scrape an URL. # @@ -45,22 +45,22 @@ private def scrape(timeout) worker = load_worker - context = worker.create - yield worker, context - worker.wait(context, timeout) + begin + context = worker.create - data = worker.fetch(context) + yield worker, context - if data == "!SUNSCRAPER_TIMEOUT" - raise ScrapeTimeout, "Sunscraper has timed out waiting for the callback" - else - data + if worker.wait(context, timeout) + worker.fetch(context) + else + raise ScrapeTimeout, "Sunscraper has timed out waiting for the callback" + end + ensure + worker.finalize(context) if context end - ensure - worker.discard(context) if context end def load_worker case @worker when :standalone \ No newline at end of file