# Copyright (c) 2020 Contrast Security, Inc. See https://www.contrastsecurity.com/enduser-terms-0317a for more details. # frozen_string_literal: true module Contrast module Agent module Assess module Rule # A regexp is only vulnerable to REDOS if it's going to run # with pathologically bad performance. # We report a vulnerability if the regexp is liable to run # with quadratic time for some input. # This vastly errs on the side of false positives. class Redos < Contrast::Agent::Assess::Rule::Base class << self NAME = 'redos' def name NAME end def regexp_complexity_check context, trigger_node, source, object, ret, *args # we can arrive here either from: # regexp =~ string # string =~ regexp # regexp.match string # # so object/args[0] can be string/regexp or regexp/string. regexp = object.is_a?(Regexp) ? object : args[0] string = object.is_a?(String) ? object : args[0] # (1) regexp must be exploitable return unless regexp_vulnerable?(regexp) # (2) regexp must evaluate against user input return unless trigger_node.violated?(string) Contrast::Agent::Assess::Policy::TriggerMethod.build_finding(context, trigger_node, source, object, ret, *args) end protected VULNERABLE_PATTERN = /[\[(].*?[\[(].*?[\])][*+?].*?[\])][*+?]/.cs__freeze # Does the regexp # https://bitbucket.org/contrastsecurity/assess-specifications/src/master/rules/dataflow/redos.md def regexp_vulnerable? regexp # A pattern is considered vulnerable if it has 2 or more levels of nested multi-matching. # A level is defined as any set of opening and closing control characters immediately followed by a multi match control character. # A control character is defined as one of the OPENING_CHARS, CLOSING_CHARS, # or MULTI_MATCH_CHARS that is not immediately preceded by an escaping \ character. # OPENING_CHARS are ( and [ CLOSING_CHARS are ) and ] MULTI_MATCH_CHARS are +, *, and ? # Nota bene about Regexp#to_s: it doesn't necessarily give you the original Regexp back # (in the sense of `my_str == Regexp.new(my_str).to_s`), it gives you a Regexp that # will have the same functional characteristics as the original. # Regexp#inspect gives you a "more nicely formatted" version than #to_s. # Regexp#source will give you the original source. # Use #match? because it doesn't fill out global variables # in the way match or =~ do. VULNERABLE_PATTERN.match? regexp.source end end end end end end end