Sha256: 06476f18313a876af3e314352ab5651980521397b614fac72302971f01bd41c1
Contents?: true
Size: 1.85 KB
Versions: 1
Compression:
Stored size: 1.85 KB
Contents
require 'mechanize' require 'addressable/uri' module GitTrend class Scraper BASE_HOST = 'https://github.com' BASE_URL = "#{BASE_HOST}/trending" def initialize @agent = Mechanize.new proxy = URI.parse(ENV['http_proxy']) if ENV['http_proxy'] @agent.set_proxy(proxy.host, proxy.port, proxy.user, proxy.password) if proxy end def get(language = nil, since = nil) projects = [] page = @agent.get(generate_url_for_get(language, since)) page.search('.leaderboard-list-content').each do |content| project = Project.new project.lang = content.search('.repo-leaderboard-title .title-meta').text project.name = content.search('.repo-leaderboard-title a').text project.description = content.search('.repo-leaderboard-description').text project.star_count = meta_count(content.search('.repo-leaderboard-meta .repo-leaderboard-meta-item .octicon-star')) project.fork_count = meta_count(content.search('.repo-leaderboard-meta .repo-leaderboard-meta-item .octicon-git-branch')) projects << project end projects end def list_all_languages languages = [] page = @agent.get(BASE_URL) page.search('div.select-menu-item a').each do |content| href = content.attributes['href'].value # objective-c++ => language = href.match(/github.com\/trending\?l=(.+)/).to_a[1] languages << CGI.unescape(language) if language end languages end private def generate_url_for_get(language, since) uri = Addressable::URI.parse(BASE_URL) if language || since uri.query_values = { l: language, since: since }.delete_if { |_k, v| v.nil? } end uri.to_s end def meta_count(elm) elm.empty? ? 0 : elm[0].parent.text.strip.gsub(',', '').to_i end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
git-trend-0.0.8 | lib/git_trend/scraper.rb |