lib/sunscraper.rb in sunscraper-1.0.0 vs lib/sunscraper.rb in sunscraper-1.1.0.beta1

- old
+ new

@@ -1,50 +1,87 @@ -require 'sunscraper/library' +if !defined?(RUBY_ENGINE) && RUBY_VERSION =~ /^1.8/ + raise RuntimeError, "Sunscraper does not work on Ruby MRI 1.8.x." +end # Sunscraper loads an HTML page in a headless browser and waits for `Sunscraper.finish()` # method to be called. It blocks the calling thread, but is threadsafe, does # not acquire GIL and thus can be called from multiple threads simultaneously. module Sunscraper # ScrapeTimeout error is raised when the page could not be loaded fast enough. class ScrapeTimeout < StandardError; end class << self + def os_x? + RUBY_PLATFORM =~ /darwin/i || RbConfig::CONFIG['target_os'] == 'darwin' + end + + attr_reader :worker + def worker=(worker_type) + if [:embed, :standalone].include?(worker_type) + @worker = worker_type + else + raise RuntimeError, "Invalid Sunscraper worker type: #{worker_type.inspect}" + end + end + # Scrape an inline HTML. The content is loaded without a particular base URL. # If your application depends on base URL being available, use {scrape_url}. # # @param [Integer] timeout timeout in milliseconds def scrape_html(html, timeout=5000) - scrape(timeout) do |context| - Library.load_html context, html + scrape(timeout) do |worker, context| + worker.load_html context, html end end # Scrape an URL. # # @param [Integer] timeout timeout in milliseconds def scrape_url(url, timeout=5000) - scrape(timeout) do |context| - Library.load_url context, url + scrape(timeout) do |worker, context| + worker.load_url context, url end end private def scrape(timeout) - context = Library.create + worker = load_worker - yield context + context = worker.create + yield worker, context + worker.wait(context, timeout) - Library.wait(context, timeout) + data = worker.fetch(context) - data = Library.fetch(context) - if data == "!SUNSCRAPER_TIMEOUT" raise ScrapeTimeout, "Sunscraper has timed out waiting for the callback" else data end ensure - Library.discard(context) if context + worker.discard(context) if context end + + def load_worker + case @worker + when :standalone + require 'sunscraper/standalone' + + Sunscraper::Standalone + + when :embed + require 'sunscraper/library' + + Sunscraper::Library + end + end end end + +if Sunscraper.os_x? + # OS X is braindead + Sunscraper.worker = :standalone +else + # ... even Win32 is better. + Sunscraper.worker = :embed +end \ No newline at end of file