require 'rubygems' require 'thor' require 'thor/rake_compat' require 'bundler' require 'github/archive' require 'resque/tasks' module Github module Archive class CLI < Thor Bundler::GemHelper.install_tasks desc 'check_for_errors', "Checks resque for processing errors." def check_for_errors Rake::Task['resque:failures:sort'].execute end desc 'process_archives', "Starts resque work processes." method_option :proc_count, :type => :string, :default => '4', :required => true def process_archives ENV['QUEUE'] = StatCollector.queue.to_s ENV['COUNT'] = options[:proc_count] Rake::Task['resque:workers'].execute end desc "setup_mysql", "Sets connection params for mysql." method_option :username, :type => :string, :default => 'root', :required => true method_option :password, :type => :string, :default => '' method_option :server, :type => :string, :default => 'localhost', :required => true method_option :database, :type => :string, :default => 'github-archive', :required => true def setup_mysql Connections.config_mysql(options[:server], options[:username], options[:password], options[:database]) Connections.write_settings end desc "create_archive", "Creates all Tables" def create_archive CreateArchive.do end desc "destroy_archive", "Destroys all Tables" def destroy_archive DestroyArchive.do end desc "setup_redis", "Sets connection params for redis." method_option :server, :type => :string, :default => 'localhost', :required => true method_option :port, :type => :string, :default => '6379', :required => true method_option :password, :type => :string, :default => nil def setup_redis Connections.config_redis(options[:server], options[:port], options[:password]) Connections.write_settings end desc "gh_repo_stats", "Lists the most active repositories for a given time range." method_option :after, :type => :string, :default => '2013-02-03T04:05:06+07:00', :required => true method_option :before, :type => :string, :default => '2013-03-03T04:05:06+07:00', :required => true method_option :event, :type => :string, :default => 'GollumEvent', :required => true method_option :n, :type => :numeric, :default => 100, :required => true def gh_repo_stats date_from = build_date_time(options[:after]) date_to = build_date_time(options[:before]) date_array = (date_from.to_date..date_to.to_date).to_a urls = build_url_array_from_dates(date_array) if urls.nil? puts "Bad time range. Please enter a correct time range." exit 0 end hour_count = urls.count urls.each do |url| archived = ArchivedUrl.where(url: url).first if archived && archived.finished_processing hour_count -= 1 elsif archived.nil? ArchivedUrl.create(url: url, finished_processing: false) Resque.enqueue(StatCollector, url) end end if hour_count == 0 results = Event.results_for_range_and_type(date_from..date_to, options[:event], options[:n]) results.each do |url, count| puts "#{url} - #{count} \n" end else puts "You still need #{hour_count} hours of data to calculate this.." end end desc "event_count", "Gets the current event count" def event_count puts Event.count end private def build_date_time(time_str) begin date_time = DateTime.strptime(time_str, '%Y-%m-%dT%H:%M:%S%z') return date_time rescue puts "Bad time format for #{time_str}. Please use something like 2013-02-03T04:05:06+07:00" exit 0 end end def build_url_array_from_dates(date_array) hour_array = (0..23).to_a date_array.collect { |date| hour_array.collect { |hour| "http://data.githubarchive.org/#{date.strftime("%Y-%m-%d")}-#{hour}.json.gz" } }.flatten! end end end end Github::Archive::CLI.start