# frozen_string_literal: true

module Zxcvbn
  module Scoring
    # on qwerty, 'g' has degree 6, being adjacent to 'ftyhbv'. '\' has degree 1.
    # this calculates the average over all keys.
    def self.calc_average_degree(graph)
      average = 0
      graph.each do |_key, neighbors|
        average += neighbors.count { |n| n }.to_f
      end
      average /= graph.keys.size.to_f
      average
    end

    BRUTEFORCE_CARDINALITY = 10

    MIN_GUESSES_BEFORE_GROWING_SEQUENCE = 10_000

    MIN_SUBMATCH_GUESSES_SINGLE_CHAR = 10

    MIN_SUBMATCH_GUESSES_MULTI_CHAR = 50

    def self.nck(n, k) # rubocop:disable Naming/MethodParameterName
      # http://blog.plover.com/math/choose.html
      return 0.0 if k > n
      return 1.0 if k == 0

      r = 1.0
      (1..k).each do |d|
        r *= n
        r /= d
        n -= 1.0
      end
      r
    end

    def self.factorial(n) # rubocop:disable Naming/MethodParameterName
      # unoptimized, called only on small n
      return 1 if n < 2

      (2..n).reduce(&:*)
    end

    # ------------------------------------------------------------------------------
    # search --- most guessable match sequence -------------------------------------
    # ------------------------------------------------------------------------------

    # takes a sequence of overlapping matches, returns the non-overlapping sequence with
    # minimum guesses. the following is a O(l_max * (n + m)) dynamic programming algorithm
    # for a length-n password with m candidate matches. l_max is the maximum optimal
    # sequence length spanning each prefix of the password. In practice it rarely exceeds 5 and the
    # search terminates rapidly.

    # the optimal "minimum guesses" sequence is here defined to be the sequence that
    # minimizes the following function:

    #    g = l! * Product(m.guesses for m in sequence) + D^(l - 1)

    # where l is the length of the sequence.

    # the factorial term is the number of ways to order l patterns.

    # the D^(l-1) term is another length penalty, roughly capturing the idea that an
    # attacker will try lower-length sequences first before trying length-l sequences.

    # for example, consider a sequence that is date-repeat-dictionary.
    #  - an attacker would need to try other date-repeat-dictionary combinations,
    #    hence the product term.
    #  - an attacker would need to try repeat-date-dictionary, dictionary-repeat-date,
    #    ..., hence the factorial term.
    #  - an attacker would also likely try length-1 (dictionary) and length-2 (dictionary-date)
    #    sequences before length-3. assuming at minimum D guesses per pattern type,
    #    D^(l-1) approximates Sum(D^i for i in [1..l-1]

    # ------------------------------------------------------------------------------
    def self.most_guessable_match_sequence(password, matches, _exclude_additive: false)
      n = password.length
      # partition matches into sublists according to ending index j
      matches_by_j = (0...n).map { [] }
      matches.each do |m|
        matches_by_j[m["j"]] << m
      end

      # small detail: for deterministic output, sort each sublist by i.
      matches_by_j.each do |lst|
        lst.sort_by! { |m| m["i"] }
      end

      optimal = {
        # optimal.m[k][l] holds final match in the best length-l match sequence covering the
        # password prefix up to k, inclusive.
        # if there is no length-l sequence that scores better (fewer guesses) than
        # a shorter match sequence spanning the same prefix, optimal.m[k][l] is undefined.
        "m" => (0...n).map { {} },
        # same structure as optimal.m -- holds the product term Prod(m.guesses for m in sequence).
        # optimal.pi allows for fast (non-looping) updates to the minimization function.
        "pi" => (0...n).map { {} },
        # same structure as optimal.m -- holds the overall metric.
        "g" => (0...n).map { {} }
      }

      # helper: considers whether a length-l sequence ending at match m is better (fewer guesses)
      # than previously encountered sequences, updating state if so.
      update = lambda do |m, l|
        k = m["j"]
        pi = estimate_guesses(m, password)
        if l > 1
          # we're considering a length-l sequence ending with match m:
          # obtain the product term in the minimization function by multiplying m's guesses
          # by the product of the length-(l-1) sequence ending just before m, at m.i - 1.
          pi *= optimal["pi"][m["i"] - 1][l - 1]
        end
        # calculate the minimization func
        g = factorial(l) * pi
        g += MIN_GUESSES_BEFORE_GROWING_SEQUENCE**(l - 1) if !_exclude_additive
        # update state if new best.
        # first see if any competing sequences covering this prefix, with l or fewer matches,
        # fare better than this sequence. if so, skip it and return.
        optimal["g"][k].find do |competing_l, competing_g|
          next if competing_l > l
          return nil if competing_g <= g
        end
        # this sequence might be part of the final optimal sequence.
        optimal["g"][k][l] = g
        optimal["m"][k][l] = m
        optimal["pi"][k][l] = pi

        optimal["g"][k] = optimal["g"][k].sort.to_h
        optimal["m"][k] = optimal["m"][k].sort.to_h
        optimal["pi"][k] = optimal["pi"][k].sort.to_h
      end

      # helper: make bruteforce match objects spanning i to j, inclusive.
      make_bruteforce_match = lambda do |i, j|
        return {
          "pattern" => "bruteforce",
          "token" => password[i..j],
          "i" => i,
          "j" => j
        }
      end

      # helper: evaluate bruteforce matches ending at k.
      bruteforce_update = lambda do |k|
        # see if a single bruteforce match spanning the k-prefix is optimal.
        m = make_bruteforce_match.call(0, k)
        update.call(m, 1)
        (1..k).each do |i|
          # generate k bruteforce matches, spanning from (i=1, j=k) up to (i=k, j=k).
          # see if adding these new matches to any of the sequences in optimal[i-1]
          # leads to new bests.
          m = make_bruteforce_match.call(i, k)
          optimal["m"][i - 1].each do |l, last_m|
            # corner: an optimal sequence will never have two adjacent bruteforce matches.
            # it is strictly better to have a single bruteforce match spanning the same region:
            # same contribution to the guess product with a lower length.
            # --> safe to skip those cases.
            next if last_m["pattern"] == "bruteforce"

            # try adding m to this length-l sequence.
            update.call(m, l + 1)
          end
        end
      end

      # helper: step backwards through optimal.m starting at the end,
      # constructing the final optimal match sequence.
      unwind = lambda do |n2|
        optimal_match_sequence = []
        k = n2 - 1
        # find the final best sequence length and score
        l, _g = (optimal["g"][k] || []).min_by { |_candidate_l, candidate_g| candidate_g || 0 }
        while k >= 0
          m = optimal["m"][k][l]
          optimal_match_sequence.unshift(m)
          k = m["i"] - 1
          l -= 1
        end
        return optimal_match_sequence
      end

      (0...n).each do |k|
        matches_by_j[k].each do |m|
          if m["i"] > 0
            optimal["m"][m["i"] - 1].each_key do |l|
              update.call(m, l + 1)
            end
          else
            update.call(m, 1)
          end
        end
        bruteforce_update.call(k)
      end

      optimal_match_sequence = unwind.call(n)
      optimal_l = optimal_match_sequence.length

      # corner: empty password
      guesses = if password.empty?
        1
      else
        optimal["g"][n - 1][optimal_l]
      end

      # final result object
      {
        "password" => password,
        "guesses" => guesses,
        "guesses_log10" => Math.log10(guesses),
        "sequence" => optimal_match_sequence
      }
    end

    # ------------------------------------------------------------------------------
    # guess estimation -- one function per match pattern ---------------------------
    # ------------------------------------------------------------------------------
    def self.estimate_guesses(match, password)
      if match["guesses"]
        return match["guesses"] # a match's guess estimate doesn't change. cache it.
      end

      min_guesses = 1
      if match["token"].length < password.length
        min_guesses = if match["token"].length == 1
          MIN_SUBMATCH_GUESSES_SINGLE_CHAR
        else
          MIN_SUBMATCH_GUESSES_MULTI_CHAR
        end
      end
      estimation_functions = {
        "bruteforce" => method(:bruteforce_guesses),
        "dictionary" => method(:dictionary_guesses),
        "spatial" => method(:spatial_guesses),
        "repeat" => method(:repeat_guesses),
        "sequence" => method(:sequence_guesses),
        "regex" => method(:regex_guesses),
        "date" => method(:date_guesses)
      }
      guesses = estimation_functions[match["pattern"]].call(match)
      match["guesses"] = [guesses, min_guesses].max
      match["guesses_log10"] = Math.log10(match["guesses"])
      match["guesses"]
    end

    MAX_VALUE = 2**1024

    def self.bruteforce_guesses(match)
      guesses = BRUTEFORCE_CARDINALITY**match["token"].length
      # trying to match JS behaviour here setting a MAX_VALUE to try to acheieve same values as JS library.
      guesses = MAX_VALUE if guesses > MAX_VALUE

      # small detail: make bruteforce matches at minimum one guess bigger than smallest allowed
      # submatch guesses, such that non-bruteforce submatches over the same [i..j] take precedence.
      min_guesses = if match["token"].length == 1
        MIN_SUBMATCH_GUESSES_SINGLE_CHAR + 1
      else
        MIN_SUBMATCH_GUESSES_MULTI_CHAR + 1
      end

      [guesses, min_guesses].max.to_f
    end

    def self.repeat_guesses(match)
      match["base_guesses"] * match["repeat_count"]
    end

    def self.sequence_guesses(match)
      first_chr = match["token"][0]
      # lower guesses for obvious starting points
      base_guesses = if ["a", "A", "z", "Z", "0", "1", "9"].include?(first_chr)
        4
      elsif first_chr.match?(/\d/)
        10
      else
        # could give a higher base for uppercase,
        # assigning 26 to both upper and lower sequences is more conservative.
        26
      end
      if !match["ascending"]
        # need to try a descending sequence in addition to every ascending sequence ->
        # 2x guesses
        base_guesses *= 2
      end
      base_guesses * match["token"].length
    end

    MIN_YEAR_SPACE = 20
    REFERENCE_YEAR = Time.now.year

    def self.regex_guesses(match)
      char_class_bases = {
        "alpha_lower" => 26,
        "alpha_upper" => 26,
        "alpha" => 52,
        "alphanumeric" => 62,
        "digits" => 10,
        "symbols" => 33
      }
      if char_class_bases.key? match["regex_name"]
        char_class_bases[match["regex_name"]]**match["token"].length
      elsif match["regex_name"] == "recent_year"
        # conservative estimate of year space: num years from REFERENCE_YEAR.
        # if year is close to REFERENCE_YEAR, estimate a year space of MIN_YEAR_SPACE.
        year_space = (match["regex_match"][0].to_i - REFERENCE_YEAR).abs
        [year_space, MIN_YEAR_SPACE].max

      end
    end

    def self.date_guesses(match)
      # base guesses: (year distance from REFERENCE_YEAR) * num_days * num_years
      year_space = [(match["year"] - REFERENCE_YEAR).abs, MIN_YEAR_SPACE].max
      guesses = year_space * 365
      separator = match["separator"]
      if !["", nil].include?(separator)
        # add factor of 4 for separator selection (one of ~4 choices)
        guesses *= 4
      end
      guesses
    end

    KEYBOARD_AVERAGE_DEGREE = calc_average_degree(ADJACENCY_GRAPHS["qwerty"]).freeze
    # slightly different for keypad/mac keypad, but close enough
    KEYPAD_AVERAGE_DEGREE = calc_average_degree(ADJACENCY_GRAPHS["keypad"]).freeze

    KEYBOARD_STARTING_POSITIONS = ADJACENCY_GRAPHS["qwerty"].keys.size
    KEYPAD_STARTING_POSITIONS = ADJACENCY_GRAPHS["keypad"].keys.size

    def self.spatial_guesses(match)
      if ["qwerty", "dvorak"].include?(match["graph"])
        s = KEYBOARD_STARTING_POSITIONS
        d = KEYBOARD_AVERAGE_DEGREE
      else
        s = KEYPAD_STARTING_POSITIONS
        d = KEYPAD_AVERAGE_DEGREE
      end
      guesses = 0.0
      ll = match["token"].length
      t = match["turns"]
      # estimate the number of possible patterns w/ length ll or less with t turns or less.
      (2..ll).each do |i|
        possible_turns = [t, i - 1].min
        (1..possible_turns).each do |j|
          guesses += nck((i - 1).to_f, (j - 1).to_f) * s.to_f * (d.to_f**j.to_f)
        end
      end
      # add extra guesses for shifted keys. (% instead of 5, A instead of a.)
      # math is similar to extra guesses of l33t substitutions in dictionary matches.
      if match["shifted_count"] && match["shifted_count"] != 0
        ss = match["shifted_count"]
        uu = match["token"].length - match["shifted_count"] # unshifted count
        if ss == 0 || uu == 0
          guesses *= 2
        else
          shifted_variations = 0
          (1..[ss, uu].min).each do |i|
            shifted_variations += nck((ss + uu).to_f, i.to_f)
          end
          guesses *= shifted_variations
        end
      end
      guesses
    end

    def self.dictionary_guesses(match)
      match["base_guesses"] = match["rank"] # keep these as properties for display purposes
      match["uppercase_variations"] = uppercase_variations(match)
      match["l33t_variations"] = l33t_variations(match)
      reversed_variations = match["reversed"] && 2 || 1
      match["base_guesses"] * match["uppercase_variations"] * match["l33t_variations"] * reversed_variations
    end

    START_UPPER = /^[A-Z][^A-Z]+$/.freeze
    END_UPPER = /^[^A-Z]+[A-Z]$/.freeze
    ALL_UPPER = /^[^a-z]+$/.freeze
    ALL_LOWER = /^[^A-Z]+$/.freeze

    def self.uppercase_variations(match)
      word = match["token"]
      return 1 if word.match?(ALL_LOWER) || word.downcase == word

      # a capitalized word is the most common capitalization scheme,
      # so it only doubles the search space (uncapitalized + capitalized).
      # allcaps and end-capitalized are common enough too, underestimate as 2x factor to be safe.
      [START_UPPER, END_UPPER, ALL_UPPER].each do |regex|
        return 2 if word.match?(regex)
      end
      # otherwise calculate the number of ways to capitalize U+L uppercase+lowercase letters
      # with U uppercase letters or less. or, if there's more uppercase than lower (for eg. PASSwORD),
      # the number of ways to lowercase U+L letters with L lowercase letters or less.
      uu = word.chars.count { |chr| chr.match?(/[A-Z]/) }
      ll = word.chars.count { |chr| chr.match?(/[a-z]/) }
      variations = 0
      (1..[uu, ll].min).each do |i|
        variations += nck(uu + ll, i)
      end
      variations
    end

    def self.l33t_variations(match)
      return 1 if !match["l33t"]

      variations = 1
      match["sub"].each do |subbed, unsubbed|
        # lower-case match.token before calculating: capitalization shouldn't affect l33t calc.
        chrs = match["token"].downcase.chars
        ss = chrs.count { |chr| chr == subbed }
        uu = chrs.count { |chr| chr == unsubbed }
        if ss == 0 || uu == 0
          # for this sub, password is either fully subbed (444) or fully unsubbed (aaa)
          # treat that as doubling the space (attacker needs to try fully subbed chars in addition to
          # unsubbed.)
          variations *= 2
        else
          # this case is similar to capitalization:
          # with aa44a, uu = 3, ss = 2, attacker needs to try unsubbed + one sub + two subs
          p = [uu, ss].min
          possibilities = 0
          (1..p).each do |i|
            possibilities += nck(uu + ss, i)
          end
          variations *= possibilities
        end
      end
      variations
    end
  end
end