lib/text/levenshtein.rb in text-1.3.0 vs lib/text/levenshtein.rb in text-1.3.1
- old
+ new
@@ -34,25 +34,30 @@
end
end
private
def distance_with_maximum(str1, str2, max_distance) # :nodoc:
- s, t = [str1, str2].sort_by(&:length).
- map{ |str| str.encode(Encoding::UTF_8).unpack("U*") }
+ s = str1.encode(Encoding::UTF_8).unpack("U*")
+ t = str2.encode(Encoding::UTF_8).unpack("U*")
+
n = s.length
m = t.length
big_int = n * m
- return m if n.zero?
- return n if m.zero?
- return 0 if s == t
+ # Swap if necessary so that s is always the shorter of the two strings
+ s, t, n, m = t, s, m, n if m < n
+
# If the length difference is already greater than the max_distance, then
# there is nothing else to check
if (n - m).abs >= max_distance
return max_distance
end
+ return 0 if s == t
+ return m if n.zero?
+ return n if m.zero?
+
# The values necessary for our threshold are written; the ones after must
# be filled with large integers since the tailing member of the threshold
# window in the bottom array will run min across them
d = (m + 1).times.map { |i|
if i < m || i < max_distance + 1
@@ -82,27 +87,29 @@
# See:
# Gusfield, Dan (1997). Algorithms on strings, trees, and sequences:
# computer science and computational biology.
# Cambridge, UK: Cambridge University Press. ISBN 0-521-58519-8.
# pp. 263–264.
- min = [0, i - max_distance - 1].max
- max = [m - 1, i + max_distance].min
+ min = i - max_distance - 1
+ min = 0 if min < 0
+ max = i + max_distance
+ max = m - 1 if max > m - 1
- (min .. max).each do |j|
+ min.upto(max) do |j|
# If the diagonal value is already greater than the max_distance
# then we can safety return: the diagonal will never go lower again.
# See: http://www.levenshtein.net/
if j == diag_index && d[j] >= max_distance
return max_distance
end
cost = s[i] == t[j] ? 0 : 1
- x = [
- d[j+1] + 1, # insertion
- e + 1, # deletion
- d[j] + cost # substitution
- ].min
+ insertion = d[j + 1] + 1
+ deletion = e + 1
+ substitution = d[j] + cost
+ x = insertion < deletion ? insertion : deletion
+ x = substitution if substitution < x
d[j] = e
e = x
end
d[m] = x
@@ -114,27 +121,31 @@
return x
end
end
def distance_without_maximum(str1, str2) # :nodoc:
- s, t = [str1, str2].map{ |str| str.encode(Encoding::UTF_8).unpack("U*") }
+ s = str1.encode(Encoding::UTF_8).unpack("U*")
+ t = str2.encode(Encoding::UTF_8).unpack("U*")
+
n = s.length
m = t.length
+
return m if n.zero?
return n if m.zero?
d = (0..m).to_a
x = nil
n.times do |i|
e = i + 1
m.times do |j|
- cost = (s[i] == t[j]) ? 0 : 1
- x = [
- d[j+1] + 1, # insertion
- e + 1, # deletion
- d[j] + cost # substitution
- ].min
+ cost = s[i] == t[j] ? 0 : 1
+ insertion = d[j + 1] + 1
+ deletion = e + 1
+ substitution = d[j] + cost
+ x = insertion < deletion ? insertion : deletion
+ x = substitution if substitution < x
+
d[j] = e
e = x
end
d[m] = x
end