lib/ai4r/data/proximity.rb in ai4r-1.12 vs lib/ai4r/data/proximity.rb in ai4r-1.13
- old
+ new
@@ -90,9 +90,31 @@
a.each {|item| similarity += 2 if b.include?(item)}
similarity /= (a.length + b.length)
return 1.0/similarity - 1
end
+ # Cosine similarity is a measure of similarity between two vectors
+ # of an inner product space that measures the cosine of the
+ # angle between them (http://en.wikipedia.org/wiki/Cosine_similarity).
+ #
+ # Parameters a and b are vectors with continuous attributes.
+ #
+ # D = sum(a[i] * b[i]) / sqrt(sum(a[i]**2)) * sqrt(sum(b[i]**2))
+ def self.cosine_distance(a,b)
+ dot_product = 0.0
+ norm_a = 0.0
+ norm_b = 0.0
+ magnitude = 0.0
+
+ a.each_index do |i|
+ dot_product += a[i] * b[i]
+ norm_a += a[i] ** 2
+ norm_b += b[i] ** 2
+ end
+
+ magnitude = Math.sqrt(norm_a) * Math.sqrt(norm_b)
+ return 1 - (dot_product / magnitude)
+ end
end
end
end