# Author::    Sergio Fierens
# License::   MPL 1.1
# Project::   ai4r
# Url::       http://ai4r.rubyforge.org/
#
# You can redistribute it and/or modify it under the terms of 
# the Mozilla Public License version 1.1  as published by the 
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt

module Ai4r
  module Data
  
    # This module provides classical distance functions
    module Proximity
      
      # This is a faster computational replacement for eclidean distance.
      # Parameters a and b are vectors with continuous attributes.
      def self.squared_euclidean_distance(a, b)
        sum = 0.0
        a.each_with_index do |item_a, i|
          item_b = b[i]
          sum += (item_a - item_b)**2
        end
        return sum
      end
      
      # Euclidean distance, or L2 norm.
      # Parameters a and b are vectors with continuous attributes.
      # Euclidean distance tends to form hyperspherical 
      # clusters(Clustering, Xu and Wunsch, 2009). 
      # Translations and rotations do not cause a 
      # distortion in distance relation (Duda et al, 2001)
      # If attributes are measured with different units, 
      # attributes with larger values and variance will 
      # dominate the metric.
      def self.euclidean_distance(a, b)
        Math.sqrt(squared_euclidean_distance(a, b))
      end
      
      
      # city block, Manhattan distance, or L1 norm.
      # Parameters a and b are vectors with continuous attributes.
      def self.manhattan_distance(a, b)
        sum = 0.0
        a.each_with_index do |item_a, i|
          item_b = b[i]
          sum += (item_a - item_b).abs
        end
        return sum
      end
      
      # Sup distance, or L-intinity norm
      # Parameters a and b are vectors with continuous attributes.      
      def self.sup_distance(a, b)
        distance = 0.0
        a.each_with_index do |item_a, i|
          item_b = b[i]
          diff = (item_a - item_b).abs
          distance = diff if diff > distance
        end
        return distance
      end
      
      # The Hamming distance between two attributes vectors of equal 
      # length is the number of attributes for which the corresponding 
      # vectors are different
      # This distance function is frequently used with binary attributes,
      # though it can be used with other discrete attributes.
      def self.hamming_distance(a,b)
        count = 0
        a.each_index do |i|
          count += 1 if a[i] != b[i]
        end
        return count
      end
      
      # The "Simple matching" distance between two attribute sets is given 
      # by the number of values present on both vectors.
      # If sets a and b have lengths da and db then:
      # 
      #  S = 2/(da + db) * Number of values present on both sets
      #  D = 1.0/S - 1
      # 
      # Some considerations: 
      # * a and b must not include repeated items
      # * all attributes are treated equally
      # * all attributes are treated equally
      def self.simple_matching_distance(a,b)
        similarity = 0.0
        a.each {|item| similarity += 2 if b.include?(item)}
        similarity /= (a.length + b.length)
        return 1.0/similarity - 1
      end      
      
    end
    
  end
  
end