Sha256: 7554cdd869b1a7b76c96822da9938385291a09e380eab103d6975c80cba7e885
Contents?: true
Size: 1.74 KB
Versions: 3
Compression:
Stored size: 1.74 KB
Contents
require_relative 'base' module Sites class WeWorkRemotely < Base HOST = 'https://weworkremotely.com'.freeze PATH = '/categories/remote-programming-jobs'.freeze DEVOPS = '/categories/remote-devops-sysadmin-jobs'.freeze JOB_ITEM_SELECTOR = '.jobs-container li a'.freeze STORE_DIR = 'data/we_work_remotely' def initialize super end def collect_jobs(limit: nil) puts "[Info] Getting the data from #{url}" FileUtils.mkdir_p STORE_DIR CSV.open(filepath, 'w') do |csv| doc.css(JOB_ITEM_SELECTOR).each do |link| if link["href"].start_with?("/remote-jobs") return if limit == @rows_count job_url = "#{HOST}#{link["href"]}" puts "[Info] Parsing #{job_url}..." csv << get_row(job_url) @rows_count += 1 end end end puts "[Done] Collected #{@rows_count} job offers from #{url}. Data stored in: #{filepath}." end private def get_row(job_url) job_page = Nokogiri::HTML(open_page(job_url)) offer_text = job_page.css('.listing-container').to_s region = job_page.css('.listing-header-container span.region').first location = job_page.css('.listing-header-container span.location').first keywords = Support::OfferParser.get_keywords(offer_text) company = job_page.css('.listing-header-container span.company').first [job_url, location, region, keywords, company] end def get_jobs_count jobs_count = doc.css(JOB_ITEM_SELECTOR) .map { |link| link['href'] } .select { |href| href.start_with?('/remote-jobs') } .size puts "[Info] There are #{jobs_count} remote jobs on [WeWorkRemotely]." jobs_count end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
remote_job_scraper-0.6.0 | lib/sites/we_work_remotely.rb |
remote_job_scraper-0.5.0 | lib/sites/we_work_remotely.rb |
remote_job_scraper-0.4.4 | lib/sites/we_work_remotely.rb |