Sha256: 91849d7c1e0a00f95f7a506d9a156886e5ac930db1cbf1558a72f37de44d9566

Contents?: true

Size: 1.23 KB

Versions: 2

Compression:

Stored size: 1.23 KB

Contents

require_relative 'base'

module Sites
  class RemoteOk < Base

    HOST = 'https://remoteok.io'.freeze
    PROGRAMMING = '/remote-dev-jobs'.freeze
    JOB_ITEM_SELECTOR = 'tr.job'.freeze
    STORE_DIR = 'data/remote_ok'.freeze

    def initialize(args = {})
      super(args = {})
    end

    def collect_jobs
      puts "[Info] Getting the data from #{url} at #{@current_time}..."
      FileUtils.mkdir_p STORE_DIR

      CSV.open(filepath, 'w') do |csv|
        doc.css(JOB_ITEM_SELECTOR).each do |link|
          job_url = "#{HOST}#{link["data-url"]}"
          puts "[Info] Processing #{job_url}..."
          job_page = Nokogiri::HTML(open_page(job_url))
          offer_text = job_page.css('td.heading').to_s

          location = Support::OfferParser.get_location(offer_text)
          region   = nil
          keywords = Support::OfferParser.get_keywords(offer_text)

          csv << [job_url, location, region, keywords]
        end
      end

      puts "[Done] Collected #{@count} job offers from #{url}. Data stores in: #{filepath}."
    end

    private

    def get_count
      count = doc.css(JOB_ITEM_SELECTOR).map { |link| link['data-url'] }.size
      puts "[Info] There is #{count} remote jobs available."
      count
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
remote_job_scraper-0.2.0 lib/sites/remote_ok.rb
remote_job_scraper-0.1.0 lib/sites/remote_ok.rb