lib/banktools-se/ocr.rb in banktools-se-2.6.2 vs lib/banktools-se/ocr.rb in banktools-se-2.6.3
- old
+ new
@@ -66,25 +66,28 @@
# max_length is 19 because that's the longest allowed integer by default in a Postgres integer column with Ruby on Rails. So attempting some queries with longer OCRs may cause exceptions.
def self.find_all_in_string(string, length_digit: false, pad: "", min_length: 4, max_length: 19)
# First, treat the input as one long string of digits.
# E.g. "1234 and 5678" becomes "12345678".
+
digit_string = string.gsub(/\D/, "")
- digit_string_length = digit_string.length
- candidates = []
-
# Then find all substrings ("n-grams") of min_length, and of all other lengths, up to max_length.
# So e.g. find all four-digit substrings ("1234", "2345", …), all five-digit substrings and so on.
+ digit_string_length = digit_string.length
+ candidates = []
+
0.upto(digit_string.length - min_length) do |start_pos|
min_end_pos = start_pos + min_length - 1
max_end_pos = [ start_pos + max_length, digit_string_length ].min - 1
min_end_pos.upto(max_end_pos) do |end_pos|
candidates << digit_string.slice(start_pos..end_pos)
end
end
+
+ # Get rid of any duplicates.
candidates.uniq!
# Finally, limit these substrings to ones that are actually valid OCRs.