Sha256: 524f09c9e4085d19af4d2f3ba2e1d30e83313a38ae3fd14b9e547c7d5e652787

Contents?: true

Size: 1.83 KB

Versions: 1

Compression:

Stored size: 1.83 KB

Contents

module JaroWinkler
  module_function
  def jaro_distance s1, s2
    return 0.0 if s1.empty? || s2.empty?
    length1, length2 = s1.length, s2.length
    window_size      = ([length1, length2].max / 2) - 1
    matches          = 0.0
    transpositions   = 0
    previous_index   = -1
    s1.chars.each_with_index do |c1, i|
      max_index = length2 - 1
      left      = i - window_size
      right     = i + window_size
      left      = 0         if left  < 0
      right     = max_index if right > max_index
      matched   = false
      found     = false
      s2[left..right].chars.each_with_index do |c2, j|
        if c1 == c2
          matched  = true
          s2_index = left + j
          unless found
            if s2_index > previous_index
              previous_index = s2_index
              found          = true
            end
          end
        end
      end
      if matched
        matches += 1
        transpositions += 1 unless found
      end
    end
    # Don't divide transpositions by 2 since it's been counted directly by above code.
    matches == 0 ? 0 : 1.0 / 3.0 * (matches / length1 + matches / length2 + (matches - transpositions) / matches)
  end

  def jaro_winkler_distance s1, s2, options = {}
    options = {weight: 0.1, threshold: 0.7, case_match: false}.merge options
    weight, threshold, case_match = options[:weight], options[:threshold], options[:case_match]
    raise 'Scaling factor should not exceed 0.25, otherwise the distance can become larger than 1' if weight > 0.25
    s1, s2 = s1.downcase, s2.downcase if case_match
    distance = jaro_distance(s1, s2)
    prefix = 0
    max_length = [4, s1.length, s2.length].min
    s1[0, max_length].chars.each_with_index do |c1, i|
      c1 == s2[i] ? prefix += 1 : break
    end
    distance < threshold ? distance : distance + ((prefix * weight) * (1 - distance))
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
jaro_winkler-1.0.1 lib/jaro_winkler.rb