recommender.rb in disco-0.3.0

- old
+ new

@@ -21,11 +21,11 @@
       # could also just check first few values
       # but may be confusing if they are all missing and later ones aren't
       @implicit = !train_set.any? { |v| v[:rating] }
 
       if @implicit && train_set.any? { |v| v[:value] }
-        warn "[disco] WARNING: Passing `:value` with implicit feedback has no effect on recommendations and can be removed. Earlier versions of the library incorrectly stated this was used."
+        raise ArgumentError, "Passing `:value` with implicit feedback has no effect on recommendations and should be removed. Earlier versions of the library incorrectly stated this was used."
       end
 
       # TODO improve performance
       # (catch exception instead of checking ahead of time)
       unless @implicit
@@ -165,47 +165,40 @@
       end
     end
 
     def similar_items(item_id, count: 5)
       check_fit
-      similar(item_id, @item_map, normalized_item_factors, count, @similar_items_index)
+      similar(item_id, :item_id, @item_map, normalized_item_factors, count, @similar_items_index)
     end
     alias_method :item_recs, :similar_items
 
     def similar_users(user_id, count: 5)
       check_fit
-      similar(user_id, @user_map, normalized_user_factors, count, @similar_users_index)
+      similar(user_id, :user_id, @user_map, normalized_user_factors, count, @similar_users_index)
     end
 
     def top_items(count: 5)
       check_fit
       raise "top_items not computed" unless @top_items
 
       if @implicit
         scores = Numo::UInt64.cast(@item_count)
       else
-        require "wilson_score"
+        min_rating = @min_rating
 
-        range =
-          if @min_rating == @max_rating
-            # TODO remove temp fix
-            (@min_rating - 1)..@max_rating
-          else
-            @min_rating..@max_rating
-          end
-        scores = Numo::DFloat.cast(@item_sum.zip(@item_count).map { |s, c| WilsonScore.rating_lower_bound(s / c, c, range) })
+        # TODO remove temp fix
+        min_rating -= 1 if @min_rating == @max_rating
 
-        # TODO uncomment in 0.3.0
         # wilson score with continuity correction
         # https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval_with_continuity_correction
-        # z = 1.96 # 95% confidence
-        # range = @max_rating - @min_rating
-        # n = Numo::DFloat.cast(@item_count)
-        # phat = (Numo::DFloat.cast(@item_sum) - (@min_rating * n)) / range / n
-        # phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
-        # scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n)
-        # scores = scores * range + @min_rating
+        z = 1.96 # 95% confidence
+        range = @max_rating - @min_rating
+        n = Numo::DFloat.cast(@item_count)
+        phat = (Numo::DFloat.cast(@item_sum) - (min_rating * n)) / range / n
+        phat = (phat - (1 / (2 * n))).clip(0, nil) # continuity correction
+        scores = (phat + z**2 / (2 * n) - z * Numo::DFloat::Math.sqrt((phat * (1 - phat) + z**2 / (4 * n)) / n)) / (1 + z**2 / n)
+        scores = scores * range + min_rating
       end
 
       indexes = scores.sort_index.reverse
       indexes = indexes[0...[count, indexes.size].min] if count
       scores = scores[indexes]
@@ -264,21 +257,20 @@
 
     private
 
     # factors should already be normalized for similar users/items
     def create_index(factors, library:)
-      # TODO make Faiss the default in 0.3.0
-      library ||= defined?(Faiss) && !defined?(Ngt) ? "faiss" : "ngt"
+      library ||= defined?(Ngt) && !defined?(Faiss) ? "ngt" : "faiss"
 
       case library
       when "faiss"
         require "faiss"
 
         # inner product is cosine similarity with normalized vectors
         # https://github.com/facebookresearch/faiss/issues/95
         #
-        # TODO use non-exact index in 0.3.0
+        # TODO add option for index type
         # https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
         # index = Faiss::IndexHNSWFlat.new(factors.shape[1], 32, :inner_product)
         index = Faiss::IndexFlatIP.new(factors.shape[1])
 
         # ids are from 0...total
@@ -316,11 +308,11 @@
       norms = Numo::SFloat::Math.sqrt((factors * factors).sum(axis: 1))
       norms[norms.eq(0)] = 1e-10 # no zeros
       factors / norms.expand_dims(1)
     end
 
-    def similar(id, map, norm_factors, count, index)
+    def similar(id, key, map, norm_factors, count, index)
       i = map[id]
 
       if i && norm_factors.shape[0] > 1
         if index && count
           if defined?(Faiss) && index.is_a?(Faiss::Index)
@@ -339,12 +331,9 @@
           predictions = predictions[indexes]
           ids = indexes
         end
 
         keys = map.keys
-
-        # TODO use user_id for similar_users in 0.3.0
-        key = :item_id
 
         result = []
         # items can have the same score
         # so original item may not be at index 0
         ids.each_with_index do |id, j|