lib/banktools-se/ocr.rb in banktools-se-2.6.2 vs lib/banktools-se/ocr.rb in banktools-se-2.6.3

- old
+ new

@@ -66,25 +66,28 @@ # max_length is 19 because that's the longest allowed integer by default in a Postgres integer column with Ruby on Rails. So attempting some queries with longer OCRs may cause exceptions. def self.find_all_in_string(string, length_digit: false, pad: "", min_length: 4, max_length: 19) # First, treat the input as one long string of digits. # E.g. "1234 and 5678" becomes "12345678". + digit_string = string.gsub(/\D/, "") - digit_string_length = digit_string.length - candidates = [] - # Then find all substrings ("n-grams") of min_length, and of all other lengths, up to max_length. # So e.g. find all four-digit substrings ("1234", "2345", …), all five-digit substrings and so on. + digit_string_length = digit_string.length + candidates = [] + 0.upto(digit_string.length - min_length) do |start_pos| min_end_pos = start_pos + min_length - 1 max_end_pos = [ start_pos + max_length, digit_string_length ].min - 1 min_end_pos.upto(max_end_pos) do |end_pos| candidates << digit_string.slice(start_pos..end_pos) end end + + # Get rid of any duplicates. candidates.uniq! # Finally, limit these substrings to ones that are actually valid OCRs.