lib/disco/recommender.rb in disco-0.2.9 vs lib/disco/recommender.rb in disco-0.3.0
- old
+ new
@@ -21,11 +21,11 @@
# could also just check first few values
# but may be confusing if they are all missing and later ones aren't
@implicit = !train_set.any? { |v| v[:rating] }
if @implicit && train_set.any? { |v| v[:value] }
- warn "[disco] WARNING: Passing `:value` with implicit feedback has no effect on recommendations and can be removed. Earlier versions of the library incorrectly stated this was used."
+ raise ArgumentError, "Passing `:value` with implicit feedback has no effect on recommendations and should be removed. Earlier versions of the library incorrectly stated this was used."
end
# TODO improve performance
# (catch exception instead of checking ahead of time)
unless @implicit
@@ -165,47 +165,40 @@
end
end
def similar_items(item_id, count: 5)
check_fit
- similar(item_id, @item_map, normalized_item_factors, count, @similar_items_index)
+ similar(item_id, :item_id, @item_map, normalized_item_factors, count, @similar_items_index)
end
alias_method :item_recs, :similar_items
def similar_users(user_id, count: 5)
check_fit
- similar(user_id, @user_map, normalized_user_factors, count, @similar_users_index)
+ similar(user_id, :user_id, @user_map, normalized_user_factors, count, @similar_users_index)
end
def top_items(count: 5)
check_fit
raise "top_items not computed" unless @top_items
if @implicit
scores = Numo::UInt64.cast(@item_count)
else
- require "wilson_score"
+ min_rating = @min_rating
- range =
- if @min_rating == @max_rating
- # TODO remove temp fix
- (@min_rating - 1)..@max_rating
- else
- @min_rating..@max_rating
- end
- scores = Numo::DFloat.cast(@item_sum.zip(@item_count).map { |s, c| WilsonScore.rating_lower_bound(s / c, c, range) })
+ # TODO remove temp fix
+ min_rating -= 1 if @min_rating == @max_rating
- # TODO uncomment in 0.3.0
# wilson score with continuity correction
# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction
- # z = 1.96 # 95% confidence
- # range = @max_rating - @min_rating
- # n = Numo::DFloat.cast(@item_count)
- # phat = (Numo::DFloat.cast(@item_sum) - (@min_rating * n)) / range / n
- # phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
- # scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n)
- # scores = scores * range + @min_rating
+ z = 1.96 # 95% confidence
+ range = @max_rating - @min_rating
+ n = Numo::DFloat.cast(@item_count)
+ phat = (Numo::DFloat.cast(@item_sum) - (min_rating * n)) / range / n
+ phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
+ scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n)
+ scores = scores * range + min_rating
end
indexes = scores.sort_index.reverse
indexes = indexes[0...[count, indexes.size].min] if count
scores = scores[indexes]
@@ -264,21 +257,20 @@
private
# factors should already be normalized for similar users/items
def create_index(factors, library:)
- # TODO make Faiss the default in 0.3.0
- library ||= defined?(Faiss) && !defined?(Ngt) ? "faiss" : "ngt"
+ library ||= defined?(Ngt) && !defined?(Faiss) ? "ngt" : "faiss"
case library
when "faiss"
require "faiss"
# inner product is cosine similarity with normalized vectors
# https://github.com/facebookresearch/faiss/issues/95
#
- # TODO use non-exact index in 0.3.0
+ # TODO add option for index type
# https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
# index = Faiss::IndexHNSWFlat.new(factors.shape[1], 32, :inner_product)
index = Faiss::IndexFlatIP.new(factors.shape[1])
# ids are from 0...total
@@ -316,11 +308,11 @@
norms = Numo::SFloat::Math.sqrt((factors * factors).sum(axis: 1))
norms[norms.eq(0)] = 1e-10 # no zeros
factors / norms.expand_dims(1)
end
- def similar(id, map, norm_factors, count, index)
+ def similar(id, key, map, norm_factors, count, index)
i = map[id]
if i && norm_factors.shape[0] > 1
if index && count
if defined?(Faiss) && index.is_a?(Faiss::Index)
@@ -339,12 +331,9 @@
predictions = predictions[indexes]
ids = indexes
end
keys = map.keys
-
- # TODO use user_id for similar_users in 0.3.0
- key = :item_id
result = []
# items can have the same score
# so original item may not be at index 0
ids.each_with_index do |id, j|