lib/ai4r/data/proximity.rb in ai4r-1.5 vs lib/ai4r/data/proximity.rb in ai4r-1.6
- old
+ new
@@ -72,9 +72,27 @@
count += 1 if a[i] != b[i]
end
return count
end
+ # The "Simple matching" distance between two attribute sets is given
+ # by the number of values present on both vectors.
+ # If sets a and b have lengths da and db then:
+ #
+ # S = 2/(da + db) * Number of values present on both sets
+ # D = 1.0/S - 1
+ #
+ # Some considerations:
+ # * a and b must not include repeated items
+ # * all attributes are treated equally
+ # * all attributes are treated equally
+ def self.simple_matching_distance(a,b)
+ similarity = 0.0
+ a.each {|item| similarity += 2 if b.include?(item)}
+ similarity /= (a.length + b.length)
+ return 1.0/similarity - 1
+ end
+
end
end
end
\ No newline at end of file