Sha256: 3a6176ef8cba5c53c473d2183d4462f7c918d372c90bf37f4b9d21a118db0663
Contents?: true
Size: 1.36 KB
Versions: 1
Compression:
Stored size: 1.36 KB
Contents
require 'sunscraper/library' # Sunscraper loads an HTML page in a headless browser and waits for `Sunscraper.finish()` # method to be called. It blocks the calling thread, but is threadsafe, does # not acquire GIL and thus can be called from multiple threads simultaneously. module Sunscraper # ScrapeTimeout error is raised when the page could not be loaded fast enough. class ScrapeTimeout < StandardError; end class << self # Scrape an inline HTML. The content is loaded without a particular base URL. # If your application depends on base URL being available, use {scrape_url}. # # @param [Integer] timeout timeout in milliseconds def scrape_html(html, timeout=5000) scrape(timeout) do |context| Library.load_html context, html end end # Scrape an URL. # # @param [Integer] timeout timeout in milliseconds def scrape_url(url, timeout=5000) scrape(timeout) do |context| Library.load_url context, url end end private def scrape(timeout) context = Library.create yield context Library.wait(context, timeout) data = Library.fetch(context) if data == "!SUNSCRAPER_TIMEOUT" raise ScrapeTimeout, "Sunscraper has timed out waiting for the callback" else data end ensure Library.discard(context) if context end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
sunscraper-1.0.0 | lib/sunscraper.rb |