Sha256: bd0dc03fa8abb36e80a2afb5b96d3b90e751f8d9bda038243d9979d10f7d697e

Contents?: true

Size: 1.85 KB

Versions: 2

Compression:

Stored size: 1.85 KB

Contents

module DidYouMean
  module Jaro
    module_function

    def distance(str1, str2)
      str1, str2 = str2, str1 if str1.length > str2.length
      length1, length2 = str1.length, str2.length

      m          = 0.0
      t          = 0.0
      range      = (length2 / 2).floor - 1
      flags1     = 0
      flags2     = 0

      # Avoid duplicating enumerable objects
      # Also, call #to_a since #codepoints returns an Enumerator on Ruby 1.9.3.
      str1_codepoints = str1.codepoints.to_a
      str2_codepoints = str2.codepoints.to_a

      i = 0
      while i < length1
        last = i + range
        j    = (i >= range) ? i - range : 0

        while j <= last
          if flags2[j] == 0 && str1_codepoints[i] == str2_codepoints[j]
            flags2 |= (1 << j)
            flags1 |= (1 << i)
            m += 1
            break
          end

          j += 1
        end

        i += 1
      end

      k = i = 0
      while i < length1
        if flags1[i] != 0
          j = index = k

          k = while j < length2
            index = j
            break(j + 1) if flags2[j] != 0

            j += 1
          end

          t += 1 if str1_codepoints[i] != str2_codepoints[index]
        end

        i += 1
      end
      t = (t / 2).floor

      m == 0 ? 0 : (m / length1 + m / length2 + (m - t) / m) / 3
    end
  end

  module JaroWinkler
    WEIGHT    = 0.1
    THRESHOLD = 0.7

    module_function

    def distance(str1, str2)
      jaro_distance = Jaro.distance(str1, str2)

      if jaro_distance > THRESHOLD
        codepoints2  = str2.codepoints.to_a
        prefix_bonus = 0

        i = 0
        str1.each_codepoint do |char1|
          char1 == codepoints2[i] && i < 4 ? prefix_bonus += 1 : break
          i += 1
        end

        jaro_distance + (prefix_bonus * WEIGHT * (1 - jaro_distance))
      else
        jaro_distance
      end
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
did_you_mean-0.10.0-java lib/did_you_mean/jaro_winkler.rb
did_you_mean-0.10.0 lib/did_you_mean/jaro_winkler.rb