# frozen_string_literal: true
##############################################
# String Comparison Algorithms
##############################################

module BBLib
  # A simple rendition of the levenshtein distance algorithm
  def self.levenshtein_distance(a, b)
    costs = (0..b.length).to_a
    (1..a.length).each do |i|
      costs[0] = i
      nw = i - 1
      (1..b.length).each do |j|
        costs[j], nw = [costs[j] + 1, costs[j-1] + 1, a[i-1] == b[j-1] ? nw : nw + 1].min, costs[j]
      end
    end
    costs[b.length]
  end

  # Calculates a percentage based match using the levenshtein distance algorithm
  def self.levenshtein_similarity(a, b)
    distance = BBLib.levenshtein_distance a, b
    max = [a.length, b.length].max.to_f
    ((max - distance.to_f) / max) * 100.0
  end

  # Calculates a percentage based match of two strings based on their character composition.
  def self.composition_similarity(a, b)
    if a.length <= b.length
      t = a
      a = b
      b = t
    end
    matches = 0
    temp = b.dup
    a.chars.each do |c|
      if temp.chars.include? c
        matches+=1
        temp = temp.sub(c, '')
      end
    end
    (matches / [a.length, b.length].max.to_f)* 100.0
  end

  # Calculates a percentage based match between two strings based on the similarity of word matches.
  def self.phrase_similarity(a, b)
    temp = b.drop_symbols.split ' '
    matches = 0
    a.drop_symbols.split(' ').each do |w|
      if temp.include? w
        matches+=1
        temp.delete_at temp.find_index w
      end
    end
    (matches.to_f / [a.split(' ').size, b.split(' ').size].max.to_f) * 100.0
  end

  # Extracts all numbers from two strings and compares them and generates a percentage of match.
  # Percentage calculations here need to be weighted better...TODO
  def self.numeric_similarity(a, b)
    a = a.extract_numbers
    b = b.extract_numbers
    return 100.0 if a.empty? && b.empty? || a == b
    matches = []
    (0..[a.size, b.size].max-1).each do |i|
      matches << 1.0 / ([a[i].to_f, b[i].to_f].max - [a[i].to_f, b[i].to_f].min + 1.0)
    end
    (matches.inject { |sum, m| sum + m } / matches.size.to_f) * 100.0
  end

  # A simple character distance calculator that uses qwerty key positions to determine how similar two strings are.
  # May be useful for typo detection.
  def self.qwerty_distance(a, b)
    a = a.downcase.strip
    b = b.downcase.strip
    if a.length <= b.length
      t = a
      a = b
      b = t
    end
    qwerty = {
      1 => %w(1 2 3 4 5 6 7 8 9 0),
      2 => %w(q w e r t y u i o p),
      3 => %w(a s d f g h j k l),
      4 => %w(z x c v b n m)
    }
    count = 0
    offset = 0
    a.chars.each do |c|
      if b.length <= count
        offset+=10
      else
        ai = qwerty.keys.find { |f| qwerty[f].include? c }.to_i
        bi = qwerty.keys.find { |f| qwerty[f].include? b.chars[count] }.to_i
        offset+= (ai - bi).abs
        offset+= (qwerty[ai].index(c) - qwerty[bi].index(b.chars[count])).abs
      end
      count+=1
    end
    offset
  end
end

class String
  def levenshtein_distance(str)
    BBLib.levenshtein_distance self, str
  end

  def levenshtein_similarity(str)
    BBLib.levenshtein_similarity self, str
  end

  def composition_similarity(str)
    BBLib.composition_similarity self, str
  end

  def phrase_similarity(str)
    BBLib.phrase_similarity self, str
  end

  def numeric_similarity(str)
    BBLib.numeric_similarity self, str
  end

  def qwerty_distance(str)
    BBLib.qwerty_distance self, str
  end
end