Sha256: 7c6e41a66df9b4966ead16344fba456400dd47f972becb6119e11d596e7d4825

Contents?: true

Size: 1.35 KB

Versions: 3

Compression:

Stored size: 1.35 KB

Contents

require_relative 'base'

module Sites
  class RemoteOk < Base

    HOST = 'https://remoteok.io'.freeze
    PATH = '/remote-dev-jobs'.freeze
    JOB_ITEM_SELECTOR = 'tr.job'.freeze
    STORE_DIR = 'data/remote_ok'.freeze

    def initialize
      super
    end

    def collect_jobs(limit: nil)
      puts "[Info] Getting the data from #{url}"
      FileUtils.mkdir_p STORE_DIR

      CSV.open(filepath, 'w') do |csv|
        doc.css(JOB_ITEM_SELECTOR).each do |link|
          return if limit == @rows_count

          job_url = "#{HOST}#{link["data-url"]}"
          puts "[Info] Parsing #{job_url}..."

          csv << get_row(job_url)

          @rows_count += 1
        end
      end

      puts "[Done] Collected #{@rows_count} job offers from #{url}. Data stored in: #{filepath}."
    end

    private

    def get_row(job_url)
      job_page = Nokogiri::HTML(open_page(job_url))
      offer_text = job_page.css('td.heading').to_s

      location = Support::OfferParser.get_location(offer_text)
      keywords = Support::OfferParser.get_keywords(offer_text)
      company = job_page.css('a.companyLink h3').text

      [job_url, location, keywords, company]
    end

    def get_jobs_count
      jobs_count = doc.css(JOB_ITEM_SELECTOR).map { |link| link['data-url'] }.size
      puts "[Info] There are #{jobs_count} remote jobs on [RemoteOK]."
      jobs_count
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
remote_job_scraper-0.6.0 lib/sites/remote_ok.rb
remote_job_scraper-0.5.0 lib/sites/remote_ok.rb
remote_job_scraper-0.4.4 lib/sites/remote_ok.rb