module Hyrax # Cache work view, file view & file download stats for all users # this is called by 'rake hyrax:stats:user_stats' class UserStatImporter UserRecord = Struct.new("UserRecord", :id, :user_key, :last_stats_update) def initialize(options = {}) if options[:verbose] stdout_logger = Logger.new(STDOUT) stdout_logger.level = Logger::INFO Rails.logger.extend(ActiveSupport::Logger.broadcast(stdout_logger)) end @logging = options[:logging] @delay_secs = options[:delay_secs].to_f @number_of_retries = options[:number_of_retries].to_i end delegate :depositor_field, to: DepositSearchBuilder def import log_message('Begin import of User stats.') sorted_users.each do |user| start_date = date_since_last_cache(user) # this user has already been processed today continue without delay next if start_date.to_date >= Time.zone.today stats = {} process_files(stats, user, start_date) process_works(stats, user, start_date) create_or_update_user_stats(stats, user) end log_message('User stats import complete.') end # Returns an array of users sorted by the date of their last stats update. Users that have not been recently updated # will be at the top of the array. def sorted_users users = [] ::User.find_each do |user| users.push(UserRecord.new(user.id, user.user_key, date_since_last_cache(user))) end users.sort_by(&:last_stats_update) end private def process_files(stats, user, start_date) file_ids_for_user(user).each do |file_id| file = ::FileSet.find(file_id) view_stats = rescue_and_retry("Retried FileViewStat on #{user} for file #{file_id} too many times.") { FileViewStat.statistics(file, start_date, user.id) } stats = tally_results(view_stats, :views, stats) unless view_stats.blank? delay dl_stats = rescue_and_retry("Retried FileDownloadStat on #{user} for file #{file_id} too many times.") { FileDownloadStat.statistics(file, start_date, user.id) } stats = tally_results(dl_stats, :downloads, stats) unless dl_stats.blank? delay end end def process_works(stats, user, start_date) work_ids_for_user(user).each do |work_id| work = Hyrax::WorkRelation.new.find(work_id) work_stats = rescue_and_retry("Retried WorkViewStat on #{user} for work #{work_id} too many times.") { WorkViewStat.statistics(work, start_date, user.id) } stats = tally_results(work_stats, :work_views, stats) unless work_stats.blank? delay end end def delay sleep @delay_secs end def rescue_and_retry(fail_message) retry_count = 0 begin return yield rescue StandardError => e retry_count += 1 if retry_count < @number_of_retries delay retry else log_message fail_message log_message "Last exception #{e}" end end end def date_since_last_cache(user) last_cached_stat = UserStat.where(user_id: user.id).order(date: :asc).last if last_cached_stat last_cached_stat.date + 1.day else Hyrax.config.analytic_start_date end end def file_ids_for_user(user) ids = [] ::FileSet.search_in_batches("#{depositor_field}:\"#{user.user_key}\"", fl: "id") do |group| ids.concat group.map { |doc| doc["id"] } end ids end def work_ids_for_user(user) ids = [] Hyrax::WorkRelation.new.search_in_batches("#{depositor_field}:\"#{user.user_key}\"", fl: "id") do |group| ids.concat group.map { |doc| doc["id"] } end ids end # For each date, add the view and download counts for this file to the view & download sub-totals for that day. # The resulting hash will look something like this: {"2014-11-30 00:00:00 UTC" => {:views=>2, :downloads=>5}, # "2014-12-01 00:00:00 UTC" => {:views=>4, :downloads=>4}} def tally_results(current_stats, stat_name, total_stats) current_stats.each do |stats| # Exclude the stats from today since it will only be a partial day's worth of data break if stats.date == Time.zone.today date_key = stats.date.to_s old_count = total_stats[date_key] ? total_stats[date_key].fetch(stat_name) { 0 } : 0 new_count = old_count + stats.method(stat_name).call old_values = total_stats[date_key] || {} total_stats.store(date_key, old_values) total_stats[date_key].store(stat_name, new_count) end total_stats end def create_or_update_user_stats(stats, user) stats.each do |date_string, data| date = Time.zone.parse(date_string) user_stat = UserStat.where(user_id: user.id, date: date).first_or_initialize(user_id: user.id, date: date) user_stat.file_views = data.fetch(:views, 0) user_stat.file_downloads = data.fetch(:downloads, 0) user_stat.work_views = data.fetch(:work_views, 0) user_stat.save! end end def log_message(message) Rails.logger.info "#{self.class}: #{message}" if @logging end end end