lib/disco/recommender.rb in disco-0.2.9 vs lib/disco/recommender.rb in disco-0.3.0

- old
+ new

@@ -21,11 +21,11 @@ # could also just check first few values # but may be confusing if they are all missing and later ones aren't @implicit = !train_set.any? { |v| v[:rating] } if @implicit && train_set.any? { |v| v[:value] } - warn "[disco] WARNING: Passing `:value` with implicit feedback has no effect on recommendations and can be removed. Earlier versions of the library incorrectly stated this was used." + raise ArgumentError, "Passing `:value` with implicit feedback has no effect on recommendations and should be removed. Earlier versions of the library incorrectly stated this was used." end # TODO improve performance # (catch exception instead of checking ahead of time) unless @implicit @@ -165,47 +165,40 @@ end end def similar_items(item_id, count: 5) check_fit - similar(item_id, @item_map, normalized_item_factors, count, @similar_items_index) + similar(item_id, :item_id, @item_map, normalized_item_factors, count, @similar_items_index) end alias_method :item_recs, :similar_items def similar_users(user_id, count: 5) check_fit - similar(user_id, @user_map, normalized_user_factors, count, @similar_users_index) + similar(user_id, :user_id, @user_map, normalized_user_factors, count, @similar_users_index) end def top_items(count: 5) check_fit raise "top_items not computed" unless @top_items if @implicit scores = Numo::UInt64.cast(@item_count) else - require "wilson_score" + min_rating = @min_rating - range = - if @min_rating == @max_rating - # TODO remove temp fix - (@min_rating - 1)..@max_rating - else - @min_rating..@max_rating - end - scores = Numo::DFloat.cast(@item_sum.zip(@item_count).map { |s, c| WilsonScore.rating_lower_bound(s / c, c, range) }) + # TODO remove temp fix + min_rating -= 1 if @min_rating == @max_rating - # TODO uncomment in 0.3.0 # wilson score with continuity correction # https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction - # z = 1.96 # 95% confidence - # range = @max_rating - @min_rating - # n = Numo::DFloat.cast(@item_count) - # phat = (Numo::DFloat.cast(@item_sum) - (@min_rating * n)) / range / n - # phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction - # scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n) - # scores = scores * range + @min_rating + z = 1.96 # 95% confidence + range = @max_rating - @min_rating + n = Numo::DFloat.cast(@item_count) + phat = (Numo::DFloat.cast(@item_sum) - (min_rating * n)) / range / n + phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction + scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n) + scores = scores * range + min_rating end indexes = scores.sort_index.reverse indexes = indexes[0...[count, indexes.size].min] if count scores = scores[indexes] @@ -264,21 +257,20 @@ private # factors should already be normalized for similar users/items def create_index(factors, library:) - # TODO make Faiss the default in 0.3.0 - library ||= defined?(Faiss) && !defined?(Ngt) ? "faiss" : "ngt" + library ||= defined?(Ngt) && !defined?(Faiss) ? "ngt" : "faiss" case library when "faiss" require "faiss" # inner product is cosine similarity with normalized vectors # https://github.com/facebookresearch/faiss/issues/95 # - # TODO use non-exact index in 0.3.0 + # TODO add option for index type # https://github.com/facebookresearch/faiss/wiki/Faiss-indexes # index = Faiss::IndexHNSWFlat.new(factors.shape[1], 32, :inner_product) index = Faiss::IndexFlatIP.new(factors.shape[1]) # ids are from 0...total @@ -316,11 +308,11 @@ norms = Numo::SFloat::Math.sqrt((factors * factors).sum(axis: 1)) norms[norms.eq(0)] = 1e-10 # no zeros factors / norms.expand_dims(1) end - def similar(id, map, norm_factors, count, index) + def similar(id, key, map, norm_factors, count, index) i = map[id] if i && norm_factors.shape[0] > 1 if index && count if defined?(Faiss) && index.is_a?(Faiss::Index) @@ -339,12 +331,9 @@ predictions = predictions[indexes] ids = indexes end keys = map.keys - - # TODO use user_id for similar_users in 0.3.0 - key = :item_id result = [] # items can have the same score # so original item may not be at index 0 ids.each_with_index do |id, j|