Sha256: bd0dc03fa8abb36e80a2afb5b96d3b90e751f8d9bda038243d9979d10f7d697e
Contents?: true
Size: 1.85 KB
Versions: 2
Compression:
Stored size: 1.85 KB
Contents
module DidYouMean module Jaro module_function def distance(str1, str2) str1, str2 = str2, str1 if str1.length > str2.length length1, length2 = str1.length, str2.length m = 0.0 t = 0.0 range = (length2 / 2).floor - 1 flags1 = 0 flags2 = 0 # Avoid duplicating enumerable objects # Also, call #to_a since #codepoints returns an Enumerator on Ruby 1.9.3. str1_codepoints = str1.codepoints.to_a str2_codepoints = str2.codepoints.to_a i = 0 while i < length1 last = i + range j = (i >= range) ? i - range : 0 while j <= last if flags2[j] == 0 && str1_codepoints[i] == str2_codepoints[j] flags2 |= (1 << j) flags1 |= (1 << i) m += 1 break end j += 1 end i += 1 end k = i = 0 while i < length1 if flags1[i] != 0 j = index = k k = while j < length2 index = j break(j + 1) if flags2[j] != 0 j += 1 end t += 1 if str1_codepoints[i] != str2_codepoints[index] end i += 1 end t = (t / 2).floor m == 0 ? 0 : (m / length1 + m / length2 + (m - t) / m) / 3 end end module JaroWinkler WEIGHT = 0.1 THRESHOLD = 0.7 module_function def distance(str1, str2) jaro_distance = Jaro.distance(str1, str2) if jaro_distance > THRESHOLD codepoints2 = str2.codepoints.to_a prefix_bonus = 0 i = 0 str1.each_codepoint do |char1| char1 == codepoints2[i] && i < 4 ? prefix_bonus += 1 : break i += 1 end jaro_distance + (prefix_bonus * WEIGHT * (1 - jaro_distance)) else jaro_distance end end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
did_you_mean-0.10.0-java | lib/did_you_mean/jaro_winkler.rb |
did_you_mean-0.10.0 | lib/did_you_mean/jaro_winkler.rb |