lib/text/levenshtein.rb in text-1.3.0 vs lib/text/levenshtein.rb in text-1.3.1

- old
+ new

@@ -34,25 +34,30 @@ end end private def distance_with_maximum(str1, str2, max_distance) # :nodoc: - s, t = [str1, str2].sort_by(&:length). - map{ |str| str.encode(Encoding::UTF_8).unpack("U*") } + s = str1.encode(Encoding::UTF_8).unpack("U*") + t = str2.encode(Encoding::UTF_8).unpack("U*") + n = s.length m = t.length big_int = n * m - return m if n.zero? - return n if m.zero? - return 0 if s == t + # Swap if necessary so that s is always the shorter of the two strings + s, t, n, m = t, s, m, n if m < n + # If the length difference is already greater than the max_distance, then # there is nothing else to check if (n - m).abs >= max_distance return max_distance end + return 0 if s == t + return m if n.zero? + return n if m.zero? + # The values necessary for our threshold are written; the ones after must # be filled with large integers since the tailing member of the threshold # window in the bottom array will run min across them d = (m + 1).times.map { |i| if i < m || i < max_distance + 1 @@ -82,27 +87,29 @@ # See: # Gusfield, Dan (1997). Algorithms on strings, trees, and sequences: # computer science and computational biology. # Cambridge, UK: Cambridge University Press. ISBN 0-521-58519-8. # pp. 263–264. - min = [0, i - max_distance - 1].max - max = [m - 1, i + max_distance].min + min = i - max_distance - 1 + min = 0 if min < 0 + max = i + max_distance + max = m - 1 if max > m - 1 - (min .. max).each do |j| + min.upto(max) do |j| # If the diagonal value is already greater than the max_distance # then we can safety return: the diagonal will never go lower again. # See: http://www.levenshtein.net/ if j == diag_index && d[j] >= max_distance return max_distance end cost = s[i] == t[j] ? 0 : 1 - x = [ - d[j+1] + 1, # insertion - e + 1, # deletion - d[j] + cost # substitution - ].min + insertion = d[j + 1] + 1 + deletion = e + 1 + substitution = d[j] + cost + x = insertion < deletion ? insertion : deletion + x = substitution if substitution < x d[j] = e e = x end d[m] = x @@ -114,27 +121,31 @@ return x end end def distance_without_maximum(str1, str2) # :nodoc: - s, t = [str1, str2].map{ |str| str.encode(Encoding::UTF_8).unpack("U*") } + s = str1.encode(Encoding::UTF_8).unpack("U*") + t = str2.encode(Encoding::UTF_8).unpack("U*") + n = s.length m = t.length + return m if n.zero? return n if m.zero? d = (0..m).to_a x = nil n.times do |i| e = i + 1 m.times do |j| - cost = (s[i] == t[j]) ? 0 : 1 - x = [ - d[j+1] + 1, # insertion - e + 1, # deletion - d[j] + cost # substitution - ].min + cost = s[i] == t[j] ? 0 : 1 + insertion = d[j + 1] + 1 + deletion = e + 1 + substitution = d[j] + cost + x = insertion < deletion ? insertion : deletion + x = substitution if substitution < x + d[j] = e e = x end d[m] = x end