Sha256: ca3a8518d54dd3e66a0e8081e4241f2d822bb9e97034c3c10a76e0879db4ab17

Contents?: true

Size: 1.03 KB

Versions: 2

Compression:

Stored size: 1.03 KB

Contents

module Sites
  class GithubRemoteJobs < Base

    HOST = 'http://github.com/'.freeze
    PATH = 'remoteintech/remote-jobs'
    JOB_ITEM_SELECTOR = '.entry-content table tbody tr'.freeze
    STORE_DIR = 'data/github_remote_jobs'.freeze

    def initialize()
      @url = "#{self.class::HOST}#{self.class::PATH}"
      @current_time = Time.now
      @timestamp = @current_time.strftime("%Y%m%d%H%M%S")
      @doc = Nokogiri::HTML(open_page(@url))
      @rows_count = 0
    end

    def collect_companies
      puts "[Info] Getting the data from #{url}"
      FileUtils.mkdir_p STORE_DIR

      CSV.open(filepath, 'w') do |csv|
        doc.css(JOB_ITEM_SELECTOR).each do |tr|
          name = tr.search('td')[0].text
          website = tr.search('td')[1].text
          region = tr.search('td')[2].text
          csv << [name, website, region]
          @rows_count += 1
        end
      end
      puts "[Done] Collected #{@rows_count} job offers from #{url}. Data stored in: #{filepath}."
    end

    def companies_count
      @rows_count
    end

  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
remote_job_scraper-0.6.0 lib/sites/github_remote_jobs.rb
remote_job_scraper-0.5.0 lib/sites/github_remote_jobs.rb