##############################################
# String Comparison Algorithms
##############################################

module BBLib

  # A simple rendition of the levenshtein distance algorithm
  def self.levenshtein_distance a, b
    costs = (0..b.length).to_a
    (1..a.length).each do |i|
      costs[0], nw = i, i - 1
      (1..b.length).each do |j|
        costs[j], nw = [costs[j] + 1, costs[j-1] + 1, a[i-1] == b[j-1] ? nw : nw + 1].min, costs[j]
      end
    end
    costs[b.length]
  end

  # Calculates a percentage based match using the levenshtein distance algorithm
  def self.levenshtein_similarity a, b
    distance = BBLib.levenshtein_distance a, b
    max = [a.length, b.length].max.to_f
    return ((max - distance.to_f) / max) * 100.0
  end

  # Calculates a percentage based match of two strings based on their character composition.
  def self.composition_similarity a, b
    if a.length <= b.length then t = a; a = b; b = t; end
    matches, temp = 0, b.dup
    a.chars.each do |c|
      if temp.chars.include? c
        matches+=1
        temp = temp.sub(c, '')
      end
    end
    (matches / [a.length, b.length].max.to_f )* 100.0
  end

  # Calculates a percentage based match between two strings based on the similarity of word matches.
  def self.phrase_similarity a, b
    temp = b.drop_symbols.split ' '
    matches = 0
    a.drop_symbols.split(' ').each do |w|
      if temp.include? w
        matches+=1
        temp.delete_at temp.find_index w
      end
    end
    (matches.to_f / [a.split(' ').size, b.split(' ').size].max.to_f) * 100.0
  end

  # Extracts all numbers from two strings and compares them and generates a percentage of match.
  # Percentage calculations here need to be weighted better...TODO
  def self.numeric_similarity a, b
    a, b = a.extract_numbers, b.extract_numbers
    return 100.0 if a.empty? && b.empty? || a == b
    matches = []
    for i in 0..[a.size, b.size].max-1
      matches << 1.0 / ([a[i].to_f, b[i].to_f].max - [a[i].to_f, b[i].to_f].min + 1.0)
    end
    (matches.inject{ |sum, m| sum + m } / matches.size.to_f) * 100.0
  end

  # A simple character distance calculator that uses qwerty key positions to determine how similar two strings are.
  # May be useful for typo detection.
  def self.qwerty_distance a, b
    a, b = a.downcase.strip, b.downcase.strip
    if a.length <= b.length then t = a; a = b; b = t; end
    qwerty = {
      1 => ['1','2','3','4','5','6','7','8','9','0'],
      2 => ['q','w','e','r','t','y','u','i','o','p'],
      3 => ['a','s','d','f','g','h','j','k','l'],
      4 => ['z','x','c','v','b','n','m']
    }
    count, offset = 0, 0
    a.chars.each do |c|
      if b.length <= count
        offset+=10
      else
        ai = qwerty.keys.find{ |f| qwerty[f].include? c }.to_i
        bi = qwerty.keys.find{ |f| qwerty[f].include? b.chars[count] }.to_i
        offset+= (ai - bi).abs
        offset+= (qwerty[ai].index(c) - qwerty[bi].index(b.chars[count])).abs
      end
      count+=1
    end
    offset
  end
end

class String
  def levenshtein_distance str
    BBLib.levenshtein_distance self, str
  end

  def levenshtein_similarity str
    BBLib.levenshtein_similarity self, str
  end

  def composition_similarity str
    BBLib.composition_similarity self, str
  end

  def phrase_similarity str
    BBLib.phrase_similarity self, str
  end

  def numeric_similarity str
    BBLib.numeric_similarity self, str
  end

  def qwerty_distance str
    BBLib.qwerty_distance self, str
  end
end