# frozen_string_literal: true

module Zxcvbn
  module Matching
    def self.build_ranked_dict(ordered_list)
      result = {}
      # rank starts at 1, not 0
      ordered_list.each_with_index do |word, idx|
        result[word] = idx + 1
      end
      result
    end

    RANKED_DICTIONARIES = FREQUENCY_LISTS.transform_values do |lst|
      build_ranked_dict(lst)
    end

    GRAPHS = {
      "qwerty" => ADJACENCY_GRAPHS["qwerty"],
      "dvorak" => ADJACENCY_GRAPHS["dvorak"],
      "keypad" => ADJACENCY_GRAPHS["keypad"],
      "mac_keypad" => ADJACENCY_GRAPHS["mac_keypad"]
    }.freeze

    L33T_TABLE = {
      "a" => ["4", "@"],
      "b" => ["8"],
      "c" => ["(", "{", "[", "<"],
      "e" => ["3"],
      "g" => ["6", "9"],
      "i" => ["1", "!", "|"],
      "l" => ["1", "|", "7"],
      "o" => ["0"],
      "s" => ["$", "5"],
      "t" => ["+", "7"],
      "x" => ["%"],
      "z" => ["2"]
    }.freeze

    REGEXEN = {
      # alpha_lower: /[a-z]/,
      # recent_year: /19\d\d|200\d|201\d/g
      "recent_year" => /19\d\d|200\d|201\d/
    }.freeze

    DATE_MAX_YEAR = 2050

    DATE_MIN_YEAR = 1000

    DATE_SPLITS = {
      4 => [ # for length-4 strings, eg 1191 or 9111, two ways to split:
        [
          1,
          2 # 1 1 91 (2nd split starts at index 1, 3rd at index 2)
        ],
        [
          2,
          3 # 91 1 1
        ]
      ],
      5 => [
        [
          1,
          3 # 1 11 91
        ],
        [
          2,
          3 # 11 1 91
        ]
      ],
      6 => [
        [
          1,
          2 # 1 1 1991
        ],
        [
          2,
          4 # 11 11 91
        ],
        [
          4,
          5 # 1991 1 1
        ]
      ],
      7 => [
        [
          1,
          3 # 1 11 1991
        ],
        [
          2,
          3 # 11 1 1991
        ],
        [
          4,
          5 # 1991 1 11
        ],
        [
          4,
          6 # 1991 11 1
        ]
      ],
      8 => [
        [
          2,
          4 # 11 11 1991
        ],
        [
          4,
          6 # 1991 11 11
        ]
      ]
    }.freeze

    def self.translate(string, chr_map)
      string.chars.map { |chr| chr_map[chr] || chr }.join
    end

    def self.sorted(matches)
      # sort on i primary, j secondary
      matches.sort_by! { |match| [match["i"], match["j"]] }
    end

    # ------------------------------------------------------------------------------
    # omnimatch -- combine everything ----------------------------------------------
    # ------------------------------------------------------------------------------
    def self.omnimatch(password)
      matches = []
      matchers = [
        :dictionary_match,
        :reverse_dictionary_match,
        :l33t_match,
        :spatial_match,
        :repeat_match,
        :sequence_match,
        :regex_match,
        :date_match
      ]
      matchers.each do |matcher|
        matches += send(matcher, password)
      end
      sorted(matches)
    end

    #-------------------------------------------------------------------------------
    # dictionary match (common passwords, english, last names, etc) ----------------
    #-------------------------------------------------------------------------------
    def self.dictionary_match(password, _ranked_dictionaries = RANKED_DICTIONARIES)
      # _ranked_dictionaries variable is for unit testing purposes
      matches = []
      len = password.length
      password_lower = password.downcase
      _ranked_dictionaries.each do |dictionary_name, ranked_dict|
        (0...len).each do |i|
          (i...len).each do |j|
            if ranked_dict.key?(password_lower[i..j])
              word = password_lower[i..j]
              rank = ranked_dict[word]
              matches << {
                "pattern" => "dictionary",
                "i" => i,
                "j" => j,
                "token" => password[i..j],
                "matched_word" => word,
                "rank" => rank,
                "dictionary_name" => dictionary_name,
                "reversed" => false,
                "l33t" => false
              }
            end
          end
        end
      end
      sorted(matches)
    end

    def self.reverse_dictionary_match(password, _ranked_dictionaries = RANKED_DICTIONARIES)
      reversed_password = password.reverse
      matches = dictionary_match(reversed_password, _ranked_dictionaries)
      matches.each do |match|
        match["token"] = match["token"].reverse
        match["reversed"] = true
        # map coordinates back to original string
        match["i"], match["j"] = [password.length - 1 - match["j"], password.length - 1 - match["i"]]
      end
      sorted(matches)
    end

    def self.user_input_dictionary=(ordered_list)
      RANKED_DICTIONARIES["user_inputs"] = build_ranked_dict(ordered_list.dup)
    end

    #-------------------------------------------------------------------------------
    # dictionary match with common l33t substitutions ------------------------------
    #-------------------------------------------------------------------------------
    # makes a pruned copy of l33t_table that only includes password's possible substitutions
    def self.relevant_l33t_subtable(password, table)
      password_chars = {}
      password.chars.each do |chr|
        password_chars[chr] = true
      end
      subtable = {}
      table.each do |letter, subs|
        relevant_subs = []
        subs.each do |sub|
          relevant_subs << sub if password_chars[sub]
        end
        subtable[letter] = relevant_subs if !relevant_subs.empty?
      end
      subtable
    end

    # returns the list of possible 1337 replacement dictionaries for a given password
    def self.enumerate_l33t_subs(table)
      dedup = lambda do |subs|
        deduped = []
        members = {}
        subs.each do |sub|
          assoc = sub.map { |k, v| [k, v] }
          assoc.sort!
          label = assoc.map { |k, v| "#{k},#{v}" }.join("-")

          if !members.key?(label)
            members[label] = true
            deduped << sub
          end
        end
        return deduped
      end

      subs = [[]]

      helper = lambda do |keys|
        return if keys.empty?

        first_key = keys[0]
        rest_keys = keys[1..-1]
        next_subs = []
        table[first_key].each do |l33t_chr|
          subs.each do |sub|
            dup_l33t_index = -1
            (0...sub.length).each do |i|
              if sub[i][0] == l33t_chr
                dup_l33t_index = i
                break
              end
            end
            if dup_l33t_index == -1
              sub_extension = sub + [[l33t_chr, first_key]]
              next_subs << sub_extension
            else
              sub_alternative = sub.dup
              sub_alternative.delete_at(dup_l33t_index)
              sub_alternative << [l33t_chr, first_key]
              next_subs << sub
              next_subs << sub_alternative
            end
          end
        end
        subs = dedup.call(next_subs)
        helper.call(rest_keys)
      end

      keys = table.keys
      helper.call(keys)

      sub_dicts = [] # convert from assoc lists to dicts
      subs.each do |sub|
        sub_dict = {}
        sub.each do |(l33t_chr, chr)|
          sub_dict[l33t_chr] = chr
        end
        sub_dicts << sub_dict
      end

      sub_dicts
    end

    def self.l33t_match(password, _ranked_dictionaries = RANKED_DICTIONARIES, _l33t_table = L33T_TABLE)
      matches = []
      enumerate_l33t_subs(relevant_l33t_subtable(password, _l33t_table)).each do |sub|
        break if sub.empty? # corner case: password has no relevant subs.

        subbed_password = translate(password, sub)
        dictionary_match(subbed_password, _ranked_dictionaries).each do |match|
          token = password[match["i"]..match["j"]]
          if token.downcase == match["matched_word"]
            next # only return the matches that contain an actual substitution
          end

          match_sub = {} # subset of mappings in sub that are in use for this match
          sub.each do |subbed_chr, chr|
            match_sub[subbed_chr] = chr if token.index(subbed_chr)
          end
          match["l33t"] = true
          match["token"] = token
          match["sub"] = match_sub
          match["sub_display"] = match_sub.map { |k, v| "#{k} -> #{v}" }.join(", ")
          matches << match
        end
      end
      sorted(matches.select do |match|
        # filter single-character l33t matches to reduce noise.
        # otherwise '1' matches 'i', '4' matches 'a', both very common English words
        # with low dictionary rank.
        match["token"].length > 1
      end)
    end

    # ------------------------------------------------------------------------------
    # spatial match (qwerty/dvorak/keypad) -----------------------------------------
    # ------------------------------------------------------------------------------
    def self.spatial_match(password, _graphs = GRAPHS)
      matches = []
      _graphs.each do |graph_name, graph|
        matches += spatial_match_helper(password, graph, graph_name)
      end
      sorted(matches)
    end

    SHIFTED_RX = /[~!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:"ZXCVBNM<>?]/.freeze

    def self.spatial_match_helper(password, graph, graph_name)
      matches = []
      i = 0
      while i < password.length - 1
        j = i + 1
        last_direction = nil
        turns = 0
        shifted_count = if ["qwerty", "dvorak"].include?(graph_name) && SHIFTED_RX.match?(password[i])
          # initial character is shifted
          1
        else
          0
        end
        loop do
          prev_char = password[j - 1]
          found = false
          found_direction = -1
          cur_direction = -1
          adjacents = graph[prev_char] || []
          # consider growing pattern by one character if j hasn't gone over the edge.
          if j < password.length
            cur_char = password[j]
            adjacents.each do |adj|
              cur_direction += 1
              if adj&.index(cur_char)
                found = true
                found_direction = cur_direction
                if adj.index(cur_char) == 1
                  # index 1 in the adjacency means the key is shifted,
                  # 0 means unshifted: A vs a, % vs 5, etc.
                  # for example, 'q' is adjacent to the entry '2@'.
                  # @ is shifted w/ index 1, 2 is unshifted.
                  shifted_count += 1
                end
                if last_direction != found_direction
                  # adding a turn is correct even in the initial case when last_direction is null:
                  # every spatial pattern starts with a turn.
                  turns += 1
                  last_direction = found_direction
                end
                break
              end
            end
          end
          # if the current pattern continued, extend j and try to grow again
          if found
            j += 1
          else
            # otherwise push the pattern discovered so far, if any...
            if j - i > 2 # don't consider length 1 or 2 chains.
              matches << {
                "pattern" => "spatial",
                "i" => i,
                "j" => j - 1,
                "token" => password[i...j],
                "graph" => graph_name,
                "turns" => turns,
                "shifted_count" => shifted_count
              }
            end
            # ...and then start a new search for the rest of the password.
            i = j
            break
          end
        end
      end
      matches
    end

    #-------------------------------------------------------------------------------
    # repeats (aaa, abcabcabc) and sequences (abcdef) ------------------------------
    #-------------------------------------------------------------------------------
    def self.repeat_match(password)
      matches = []
      greedy = /(.+)\1+/
      lazy = /(.+?)\1+/
      lazy_anchored = /^(.+?)\1+$/
      last_index = 0
      while last_index < password.length
        # greedy_last_index = lazy_last_index = last_index
        greedy_match = greedy.match(password, last_index)
        lazy_match = lazy.match(password, last_index)
        break if !greedy_match

        # coverage ???
        if greedy_match[0].length > lazy_match[0].length
          # greedy beats lazy for 'aabaab'
          #   greedy: [aabaab, aab]
          #   lazy:   [aa,     a]
          match = greedy_match
          # greedy's repeated string might itself be repeated, eg.
          # aabaab in aabaabaabaab.
          # run an anchored lazy match on greedy's repeated string
          # to find the shortest repeated string
          base_token = lazy_anchored.match(match[0])[1]
        else
          # lazy beats greedy for 'aaaaa'
          #   greedy: [aaaa,  aa]
          #   lazy:   [aaaaa, a]
          match = lazy_match
          base_token = match[1]
        end
        i = match.begin(0)
        j = match.end(0) - 1
        # recursively match and score the base string
        base_analysis = Scoring.most_guessable_match_sequence(base_token, omnimatch(base_token))
        base_matches = base_analysis["sequence"]
        base_guesses = base_analysis["guesses"]
        matches << {
          "pattern" => "repeat",
          "i" => i,
          "j" => j,
          "token" => match[0],
          "base_token" => base_token,
          "base_guesses" => base_guesses,
          "base_matches" => base_matches,
          "repeat_count" => match[0].length / base_token.length
        }
        last_index = j + 1
      end
      matches
    end

    MAX_DELTA = 5

    def self.sequence_match(password)
      # Identifies sequences by looking for repeated differences in unicode codepoint.
      # this allows skipping, such as 9753, and also matches some extended unicode sequences
      # such as Greek and Cyrillic alphabets.

      # for example, consider the input 'abcdb975zy'

      # password: a   b   c   d   b    9   7   5   z   y
      # index:    0   1   2   3   4    5   6   7   8   9
      # delta:      1   1   1  -2  -41  -2  -2  69   1

      # expected result:
      # [(i, j, delta), ...] = [(0, 3, 1), (5, 7, -2), (8, 9, 1)]
      return [] if password.length == 1

      result = []

      update = lambda do |i, j, delta|
        delta ||= 0
        if (j - i > 1 || delta.abs == 1) && (delta.abs > 0 && delta.abs <= MAX_DELTA)
          token = password[i..j]
          case token
          when /^[a-z]+$/
            sequence_name = "lower"
            sequence_space = 26
          when /^[A-Z]+$/
            sequence_name = "upper"
            sequence_space = 26
          when /^\d+$/
            sequence_name = "digits"
            sequence_space = 10
          else
            # conservatively stick with roman alphabet size.
            # (this could be improved)
            sequence_name = "unicode"
            sequence_space = 26
          end
          return result << {
            "pattern" => "sequence",
            "i" => i,
            "j" => j,
            "token" => password[i..j],
            "sequence_name" => sequence_name,
            "sequence_space" => sequence_space,
            "ascending" => delta > 0
          }
        end
      end

      result = []
      i = 0
      last_delta = nil

      (1...password.length).each do |k|
        delta = password[k].ord - password[k - 1].ord
        last_delta ||= delta
        next if delta == last_delta

        j = k - 1
        update.call(i, j, last_delta)
        i = j
        last_delta = delta
      end
      update.call(i, password.length - 1, last_delta)
      result
    end

    #-------------------------------------------------------------------------------
    # regex matching ---------------------------------------------------------------
    #-------------------------------------------------------------------------------
    def self.regex_match(password, _regexen = REGEXEN)
      matches = []
      _regexen.each do |name, regex|
        # regex.lastIndex = 0; # keeps regex_match stateless
        match_index = 0
        while (rx_match = regex.match(password, match_index))
          token = rx_match[0]
          matches << {
            "pattern" => "regex",
            "token" => token,
            "i" => rx_match.begin(0),
            "j" => rx_match.end(0) - 1,
            "regex_name" => name,
            "regex_match" => rx_match.to_a
          }
          match_index = rx_match.begin(0) + 1
        end
      end
      sorted(matches)
    end

    #-------------------------------------------------------------------------------
    # date matching ----------------------------------------------------------------
    #-------------------------------------------------------------------------------
    def self.date_match(password)
      # a "date" is recognized as:
      #   any 3-tuple that starts or ends with a 2- or 4-digit year,
      #   with 2 or 0 separator chars (1.1.91 or 1191),
      #   maybe zero-padded (01-01-91 vs 1-1-91),
      #   a month between 1 and 12,
      #   a day between 1 and 31.

      # NOTE: this isn't true date parsing in that "feb 31st" is allowed,
      # this doesn't check for leap years, etc.

      # recipe:
      # start with regex to find maybe-dates, then attempt to map the integers
      # onto month-day-year to filter the maybe-dates into dates.
      # finally, remove matches that are substrings of other matches to reduce noise.

      # NOTE: instead of using a lazy or greedy regex to find many dates over the full string,
      # this uses a ^...$ regex against every substring of the password -- less performant but leads
      # to every possible date match.
      matches = []
      maybe_date_no_separator = /^\d{4,8}$/

      # maybe_date_with_separator = %r{
      #   ^
      #   ( \d{1,4} )    # day, month, year
      #   ( [\s/\\_.-] ) # separator
      #   ( \d{1,2} )    # day, month
      #   \2             # same separator
      #   ( \d{1,4} )    # day, month, year
      #   $
      # }
      maybe_date_with_separator = %r{^(\d{1,4})([\s/\\_.-])(\d{1,2})\2(\d{1,4})$}

      (0..(password.length - 4)).each do |i|
        (i + 3..i + 7).each do |j|
          break if j >= password.length

          token = password[i..j]
          next if !maybe_date_no_separator.match(token)

          candidates = []
          DATE_SPLITS[token.length].each do |(k, l)|
            dmy = map_ints_to_dmy([token[0...k].to_i, token[k...l].to_i, token[l..-1].to_i])
            candidates << dmy if dmy
          end

          next if candidates.empty?

          # at this point: different possible dmy mappings for the same i,j substring.
          # match the candidate date that likely takes the fewest guesses: a year closest to 2000.
          # (scoring.REFERENCE_YEAR).

          # ie, considering '111504', prefer 11-15-04 to 1-1-1504
          # (interpreting '04' as 2004)
          best_candidate = candidates.min_by { |candidate| (candidate["year"] - Scoring::REFERENCE_YEAR).abs }
          matches << {
            "pattern" => "date",
            "token" => token,
            "i" => i,
            "j" => j,
            "separator" => "",
            "year" => best_candidate["year"],
            "month" => best_candidate["month"],
            "day" => best_candidate["day"]
          }
        end
      end
      # dates with separators are between length 6 '1/1/91' and 10 '11/11/1991'
      (0..password.length - 6).each do |i|
        (i + 5..i + 9).each do |j|
          break if j >= password.length

          token = password[i..j]
          rx_match = maybe_date_with_separator.match(token)
          next if !rx_match

          dmy = map_ints_to_dmy([rx_match[1].to_i, rx_match[3].to_i, rx_match[4].to_i])
          next if !dmy

          matches << {
            "pattern" => "date",
            "token" => token,
            "i" => i,
            "j" => j,
            "separator" => rx_match[2],
            "year" => dmy["year"],
            "month" => dmy["month"],
            "day" => dmy["day"]
          }
        end
      end
      # matches now contains all valid date strings in a way that is tricky to capture
      # with regexes only. while thorough, it will contain some unintuitive noise:

      # '2015_06_04', in addition to matching 2015_06_04, will also contain
      # 5(!) other date matches: 15_06_04, 5_06_04, ..., even 2015 (matched as 5/1/2020)

      # to reduce noise, remove date matches that are strict substrings of others
      sorted(matches.uniq.reject do |match|
        matches.find do |other_match|
          (match["i"] > other_match["i"] && match["j"] <= other_match["j"]) ||
          (match["i"] >= other_match["i"] && match["j"] < other_match["j"])
        end
      end)
    end

    def self.map_ints_to_dmy(ints)
      # given a 3-tuple, discard if:
      #   middle int is over 31 (for all dmy formats, years are never allowed in the middle)
      #   middle int is zero
      #   any int is over the max allowable year
      #   any int is over two digits but under the min allowable year
      #   2 ints are over 31, the max allowable day
      #   2 ints are zero
      #   all ints are over 12, the max allowable month
      return if ints[1] > 31 || ints[1] <= 0

      over_12 = 0
      over_31 = 0
      under_1 = 0
      ints.each do |int|
        return nil if (int > 99 && int < DATE_MIN_YEAR) || int > DATE_MAX_YEAR

        over_31 += 1 if int > 31
        over_12 += 1 if int > 12
        under_1 += 1 if int <= 0
      end
      return if over_31 >= 2 || over_12 == 3 || under_1 >= 2

      possible_year_splits = [
        [ints[2], ints[0..1]], # year last
        [ints[0], ints[1..2]] # year first
      ]
      possible_year_splits.each do |(y, rest)|
        if DATE_MIN_YEAR <= y && y <= DATE_MAX_YEAR
          dm = map_ints_to_dm(rest)

          # for a candidate that includes a four-digit year,
          # when the remaining ints don't match to a day and month,
          # it is not a date.
          return nil if !dm

          return {
            "year" => y,
            "month" => dm["month"],
            "day" => dm["day"]
          }
        end
      end

      # given no four-digit year, two digit years are the most flexible int to match, so
      # try to parse a day-month out of ints[0..1] or ints[1..0]
      possible_year_splits.each do |(y, rest)| # rubocop:disable Style/CombinableLoops
        dm = map_ints_to_dm(rest)
        if dm
          y = two_to_four_digit_year(y)
          return {
            "year" => y,
            "month" => dm["month"],
            "day" => dm["day"]
          }
        end
      end
      nil
    end

    def self.map_ints_to_dm(ints)
      [ints, ints.reverse].each do |(d, m)|
        if (d >= 1 && d <= 31) && (m >= 1 && m <= 12)
          return {
            "day" => d,
            "month" => m
          }
        end
      end
      nil
    end

    def self.two_to_four_digit_year(year)
      if year > 99
        year
      elsif year > 50
        # 87 -> 1987
        year + 1900
      else
        # 15 -> 2015
        year + 2000
      end
    end
  end
end