lib/ai4r/clusterers/complete_linkage.rb in ai4r-1.5 vs lib/ai4r/clusterers/complete_linkage.rb in ai4r-1.6
- old
+ new
@@ -11,18 +11,28 @@
require File.dirname(__FILE__) + '/../clusterers/single_linkage'
module Ai4r
module Clusterers
- # Implementation of a Hierarchical clusterer with complete linkage.
+ # Implementation of a Hierarchical clusterer with complete linkage (Everitt
+ # et al., 2001 ; Jain and Dubes, 1988 ; Sorensen, 1948 ).
# Hierarchical clusteres create one cluster per element, and then
# progressively merge clusters, until the required number of clusters
# is reached.
# With complete linkage, the distance between two clusters is computed as
# the maximum distance between elements of each cluster.
+ #
+ # D(cx, (ci U cj) = max(D(cx, ci), D(cx, cj))
class CompleteLinkage < SingleLinkage
+ parameters_info :distance_function =>
+ "Custom implementation of distance function. " +
+ "It must be a closure receiving two data items and return the " +
+ "distance bewteen them. By default, this algorithm uses " +
+ "ecuclidean distance of numeric attributes to the power of 2."
+
+
# Build a new clusterer, using data examples found in data_set.
# Items will be clustered in "number_of_clusters" different
# clusters.
def build(data_set, number_of_clusters)
super
@@ -34,25 +44,20 @@
super
end
protected
- # Calculate cluster distance using the complete linkage method
- def calc_index_clusters_distance(cluster_a, cluster_b)
- max_dist = 0
- cluster_a.each do |index_a|
- cluster_b.each do |index_b|
- dist = read_distance_matrix(index_a, index_b)
- max_dist = dist if dist > max_dist
- end
- end
- return max_dist
+ # return distance between cluster cx and new cluster (ci U cj),
+ # using complete linkage
+ def linkage_distance(cx, ci, cj)
+ [read_distance_matrix(cx, ci),
+ read_distance_matrix(cx, cj)].max
end
def distance_between_item_and_cluster(data_item, cluster)
max_dist = 0
cluster.data_items.each do |another_item|
- dist = distance(data_item, another_item)
+ dist = @distance_function.call(data_item, another_item)
max_dist = dist if dist > max_dist
end
return max_dist
end