lib/github_metadata.rb in github_metadata-0.1.1 vs lib/github_metadata.rb in github_metadata-0.2.0
- old
+ new
@@ -1,8 +1,10 @@
+# encoding: utf-8
require 'rubygems'
require 'open-uri'
require 'nokogiri'
+require 'feedzirra'
# A simple scraper that fetches data from github repos that is not
# available via the API. See README for an introduction and overview.
class GithubMetadata
class RepoNotFound < StandardError; end;
@@ -15,10 +17,30 @@
def initialize(username, realname=nil)
@username, @realname = username, realname
end
end
+ # Object representation of a commit, initialized
+ # from a github repo commit feed entry
+ class Commit
+ attr_reader :title, :message, :committed_at, :url, :author
+
+ def initialize(atom_entry)
+ @atom_entry = atom_entry
+ @title = atom_entry.title
+ @message = atom_entry.content
+ @author = atom_entry.author
+ @committed_at = atom_entry.updated.kind_of?(Time) ? atom_entry.updated : Time.parse(atom_entry.updated)
+ @url = atom_entry.url
+ end
+
+ private
+ def atom_entry
+ @atom_entry
+ end
+ end
+
def initialize(user, repo)
@user, @repo = user, repo
end
# Similar to initialization with GithubMetadata.new, but it will immediately try
@@ -34,10 +56,22 @@
instance
rescue GithubMetadata::RepoNotFound => err
nil
end
+ def github_url
+ "https://github.com/#{user}/#{repo}/"
+ end
+
+ def contributors_url
+ File.join(github_url, 'contributors')
+ end
+
+ def commits_feed_url
+ File.join(github_url, "commits/#{default_branch}.atom")
+ end
+
# Returns an array of GithubMetadata::Contributor instances, one for each
# contributor listed on the contributors page of github
def contributors
load_contributors unless @contributors
@contributors
@@ -89,19 +123,38 @@
pull_request_link = document.at_css('a[highlight="repo_pulls"]')
return nil unless pull_request_link
pull_request_link.text[/\d+/].to_i
end
+ # Returns the default branch of the repo
+ def default_branch
+ document.at_css('.tabs .contextswitch code').text
+ end
+
+ # Returns (at most) the last 20 commits (fetched from atom feed of the default_branch)
+ # as instances of GithubMetadata::Commit
+ def recent_commits
+ @recent_commits ||= commits_feed.entries.map {|e| GithubMetadata::Commit.new(e) }
+ end
+
+ # Returns the average date of recent commits (by default all (max 20), can be modified
+ # by giving the optional argument)
+ def average_recent_committed_at(num=100)
+ commit_times = recent_commits[0..num].map {|c| c.committed_at.to_f }
+ average_time = commit_times.inject(0) {|s, i| s + i} / commit_times.length
+ Time.at(average_time)
+ end
+
private
def document
@document ||= Nokogiri::HTML(open(contributors_url))
rescue OpenURI::HTTPError => err
raise GithubMetadata::RepoNotFound, err.to_s
end
- def contributors_url
- "https://github.com/#{user}/#{repo}/contributors"
+ def commits_feed
+ @commits_feed ||= Feedzirra::Feed.fetch_and_parse(commits_feed_url)
end
def load_contributors
@contributors = document.css('#repos #watchers.members li').map do |contributor|
line = contributor.text.gsub("\n", '').squeeze(' ').strip.chomp