Sha256: 9a9844d9e0ec5adf5409d55bf218ff3ac6ce576e06d6a206e305ba0afe698cc9
Contents?: true
Size: 1.93 KB
Versions: 2
Compression:
Stored size: 1.93 KB
Contents
module Pageflow module Chart class ScrapeSiteJob extend StateMachineJob @queue = :scraping attr_reader :downloader def initialize(downloader) @downloader = downloader end def perform(scraped_site) downloader.load(scraped_site.url) do |file| scraper = Scraper.new(file.read, Chart.config.scraper_options) scraped_site.html_file = StringIOWithContentType.new( scraper.html, file_name: 'file.html', content_type: 'text/html' ) downloader.load_all(scraper.javascript_urls, extension: '.js', before_each: begin_try_catch, after_each: end_try_catch) do |javascript_file| scraped_site.javascript_file = javascript_file end downloader.load_all(scraper.stylesheet_urls, extension: '.css', separator: "\n;") do |stylesheet_file| scraped_site.stylesheet_file = stylesheet_file end end downloader.load(scraped_site.csv_url) do |file| scraped_site.csv_file = file end :ok end def self.perform_with_result(scraped_site, options = {}) # This is were the downloader passed to `initialize` is created. new(Downloader.new(base_url: scraped_site.url)).perform(scraped_site) end def begin_try_catch ";try {\n" end def end_try_catch "\n} catch(e) { console.log('Datawrapper raised: ' + (e.message || e)); }\n" end end class StringIOWithContentType < StringIO def initialize(string, options) super(string) @options = options end def content_type @options.fetch(:content_type) end def original_filename @options.fetch(:file_name) end end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
pageflow-chart-0.2.1 | app/jobs/pageflow/chart/scrape_site_job.rb |
pageflow-chart-0.2.0 | app/jobs/pageflow/chart/scrape_site_job.rb |