lib/disco/recommender.rb in disco-0.4.0 vs lib/disco/recommender.rb in disco-0.4.2
- old
+ new
@@ -97,12 +97,12 @@
@global_mean = model.bias
@user_factors = model.p_factors(format: :numo)
@item_factors = model.q_factors(format: :numo)
- @normalized_user_factors = nil
- @normalized_item_factors = nil
+ @user_norms = nil
+ @item_norms = nil
@user_recs_index = nil
@similar_users_index = nil
@similar_items_index = nil
end
@@ -170,17 +170,17 @@
end
end
def similar_items(item_id, count: 5)
check_fit
- similar(item_id, :item_id, @item_map, normalized_item_factors, count, @similar_items_index)
+ similar(item_id, :item_id, @item_map, @item_factors, item_norms, count, @similar_items_index)
end
alias_method :item_recs, :similar_items
def similar_users(user_id, count: 5)
check_fit
- similar(user_id, :user_id, @user_map, normalized_user_factors, count, @similar_users_index)
+ similar(user_id, :user_id, @user_map, @user_factors, user_norms, count, @similar_users_index)
end
def top_items(count: 5)
check_fit
raise "top_items not computed" unless @top_items
@@ -245,17 +245,17 @@
@user_recs_index = create_index(item_factors, library: "faiss")
end
def optimize_similar_items(library: nil)
check_fit
- @similar_items_index = create_index(normalized_item_factors, library: library)
+ @similar_items_index = create_index(@item_factors / item_norms.expand_dims(1), library: library)
end
alias_method :optimize_item_recs, :optimize_similar_items
def optimize_similar_users(library: nil)
check_fit
- @similar_users_index = create_index(normalized_user_factors, library: library)
+ @similar_users_index = create_index(@user_factors / user_norms.expand_dims(1), library: library)
end
def inspect
to_s # for now
end
@@ -339,40 +339,41 @@
else
raise ArgumentError, "Invalid library: #{library}"
end
end
- def normalized_user_factors
- @normalized_user_factors ||= normalize(@user_factors)
+ def user_norms
+ @user_norms ||= norms(@user_factors)
end
- def normalized_item_factors
- @normalized_item_factors ||= normalize(@item_factors)
+ def item_norms
+ @item_norms ||= norms(@item_factors)
end
- def normalize(factors)
+ def norms(factors)
norms = Numo::SFloat::Math.sqrt((factors * factors).sum(axis: 1))
norms[norms.eq(0)] = 1e-10 # no zeros
- factors / norms.expand_dims(1)
+ norms
end
- def similar(id, key, map, norm_factors, count, index)
+ def similar(id, key, map, factors, norms, count, index)
i = map[id]
- if i && norm_factors.shape[0] > 1
+ if i && factors.shape[0] > 1
if index && count
+ norm_factors = factors[i, true] / norms[i]
if defined?(Faiss) && index.is_a?(Faiss::Index)
- predictions, ids = index.search(norm_factors[i, true].expand_dims(0), count + 1).map { |v| v.to_a[0] }
+ predictions, ids = index.search(norm_factors.expand_dims(0), count + 1).map { |v| v.to_a[0] }
else
- result = index.search(norm_factors[i, true], size: count + 1)
+ result = index.search(norm_factors, size: count + 1)
# ids from batch_insert start at 1 instead of 0
ids = result.map { |v| v[:id] - 1 }
# convert cosine distance to cosine similarity
predictions = result.map { |v| 1 - v[:distance] }
end
else
- predictions = norm_factors.inner(norm_factors[i, true])
+ predictions = factors.inner(factors[i, true]) / (norms * norms[i])
indexes = predictions.sort_index.reverse
indexes = indexes[0...[count + 1, indexes.size].min] if count
predictions = predictions[indexes]
ids = indexes
end
@@ -384,9 +385,10 @@
# so original item may not be at index 0
ids.each_with_index do |id, j|
next if id == i
result << {key => keys[id], score: predictions[j]}
+ break if result.size == count
end
result
else
[]
end