require 'set' module Evalir class Evalirator # Gets the number of retrieved results # that were indeed relevant. attr_reader :true_positives # Gets the number of retrieved results # that were in fact irrelevant. attr_reader :false_positives # Instantiates a new instance of the # Evalirator, using the provided judgements # as a basis for later calculations. def initialize(relevant_docids, retrieved_docids = []) @relevant_docids = relevant_docids.to_set @true_positives = @false_positives = 0 @search_hits = [] retrieved_docids.each do |docid| if @relevant_docids.include? docid @true_positives = @true_positives + 1 else @false_positives = @false_positives + 1 end @search_hits << docid end end # Gets the size of the evaluated set, # e.g. the number of search hits added. def size @search_hits.size.to_f end # Calculate the number of false negatives. # Divide by #size to get the rate, e.g: # fn_rate = e.false_negatives / e.size def false_negatives @relevant_docids.size - @true_positives end # Calculate the precision, e.g. the # fraction of retrieved documents that # were relevant. def precision @true_positives / size end # Calculate the recall, e.g. the # fraction of relevant documents that # were retrieved. def recall fn = false_negatives @true_positives / (@true_positives + fn + 0.0) end # Calculate the evenly weighted # harmonic mean of #precision and # #recall. This is equivalent to # calling #f_measure with a parameter # of 1.0. def f1 f_measure(1.0) end # Calculate the weighted harmonic # mean of precision and recall - # β > 1 means emphasizing recall, # β < 1 means emphasizing precision. # β = 1 is equivalent to #f1. def f_measure(beta) betaSquared = beta ** 2 n = (betaSquared + 1) * (precision * recall) d = (betaSquared * precision) + recall n / d end # Returns the top p percent hits # that were added to this evalirator. def top_percent(p) k = size * (p / 100.0) @search_hits[0,k.ceil] end # The precision at the rank k, # meaning the precision after exactly # k documents have been retrieved. def precision_at_rank(k) top_k = @search_hits[0, k].to_set (@relevant_docids & top_k).size.to_f / k end # Returns the precision at r percent # recall. Used to plot the Precision # vs. Recall curve. def precision_at_recall(r) return 1.0 if r == 0.0 k = (size * r).ceil top_k = @search_hits[0, k].to_set (@relevant_docids & top_k).size.to_f / k end # A single value summary which is # obtained by computing the precision # at the R-th position in the ranking. # Here, R is the total number of # relevant documents for the current # query. def r_precision r = @relevant_docids.size top_r = @search_hits[0, r].to_set (@relevant_docids & top_r).size.to_f / r end # Gets the data for the precision-recall # curve, ranging over the interval [from, # to], with a step size of step. def precision_recall_curve(from = 0, to = 100, step = 10) raise "From must be in the interval [0, 100)" unless (from >= 0 and from < 100) raise "To must be in the interval (from, 100]" unless (to > from and to <= 100) raise "Invalid step size - (to-from) must be divisible by step." unless ((to - from) % step) == 0 data = [] range = from..to range.step(step).each do |recall| data << self.precision_at_recall(recall/100.0) end data end # The average precision. This is # equivalent to the average of calling # #precision_at_rank with 1..n, n # being the number of results. def average_precision n = 0 avg = 0.0 relevant = 0 @search_hits.each do |h| n = n + 1 if @relevant_docids.include? h relevant = relevant + 1 avg += (relevant.to_f / n) / @relevant_docids.size end end avg end # Discounted Cumulative Gain at # rank k. For a relevant search # result at position x, its con- # tribution to the DCG is # 1.0/Math.log(x, logbase). A # higher logbase means more dis- # counts for results further out. def dcg_at(k, logbase=2) i = 1 dcg = 0.0 @search_hits[0, k].each do |h| if @relevant_docids.include? h dcg += i == 1 ? 1.0 : 1.0 / Math.log(i, logbase) end i += 1 end dcg end end end