Sha256: ce5a048244ff881c10a77cfe4f597afe9c4d660c51ec7ef0c4a763e01ab6abfd
Contents?: true
Size: 1.53 KB
Versions: 2
Compression:
Stored size: 1.53 KB
Contents
module Pageflow module Chart class ScrapeSiteJob extend StateMachineJob @queue = :scraping attr_reader :downloader def initialize(downloader) @downloader = downloader end def perform(scraped_site) downloader.load(scraped_site.url) do |file| scraper = Scraper.new(file.read, Chart.config.scraper_options) scraped_site.html_file = StringIOWithContentType.new( scraper.html, file_name: 'file.html', content_type: 'text/html' ) downloader.load_all(scraper.javascript_urls, extension: '.js', separator: "\n;") do |file| scraped_site.javascript_file = file end downloader.load_all(scraper.stylesheet_urls, extension: '.css', separator: "\n;") do |file| scraped_site.stylesheet_file = file end end downloader.load(scraped_site.csv_url) do |file| scraped_site.csv_file = file end :ok end def self.perform_with_result(scraped_site, options = {}) # This is were the downloader passed to `initialize` is created. new(Downloader.new(base_url: scraped_site.url)).perform(scraped_site) end end class StringIOWithContentType < StringIO def initialize(string, options) super(string) @options = options end def content_type @options.fetch(:content_type) end def original_filename @options.fetch(:file_name) end end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
pageflow-chart-0.1.1 | app/jobs/pageflow/chart/scrape_site_job.rb |
pageflow-chart-0.1.0 | app/jobs/pageflow/chart/scrape_site_job.rb |