lib/ai4r/data/proximity.rb in ai4r-1.12 vs lib/ai4r/data/proximity.rb in ai4r-1.13

- old
+ new

@@ -90,9 +90,31 @@ a.each {|item| similarity += 2 if b.include?(item)} similarity /= (a.length + b.length) return 1.0/similarity - 1 end + # Cosine similarity is a measure of similarity between two vectors + # of an inner product space that measures the cosine of the + # angle between them (http://en.wikipedia.org/wiki/Cosine_similarity). + # + # Parameters a and b are vectors with continuous attributes. + # + # D = sum(a[i] * b[i]) / sqrt(sum(a[i]**2)) * sqrt(sum(b[i]**2)) + def self.cosine_distance(a,b) + dot_product = 0.0 + norm_a = 0.0 + norm_b = 0.0 + magnitude = 0.0 + + a.each_index do |i| + dot_product += a[i] * b[i] + norm_a += a[i] ** 2 + norm_b += b[i] ** 2 + end + + magnitude = Math.sqrt(norm_a) * Math.sqrt(norm_b) + return 1 - (dot_product / magnitude) + end end end end