module RegexpExamples # A collection of related helper methods, utilised by the `Parser` class module ParseMultiGroupHelper protected def parse_multi_group # TODO: Clean up this ugly mess of a method! @current_position += 1 @num_groups += 1 remember_old_regexp_options do group_id = nil # init rest_of_string.match( / \A (\?)? # Is it a "special" group, i.e. starts with a "?"? ( : # Non capture group |! # Neglookahead |= # Lookahead |\# # Comment group |~ # Absent operator (ruby 2.4.1+) |< # Lookbehind or named capture ( ! # Neglookbehind |= # Lookbehind |[^>]+ # Named capture ) |[mix]*-?[mix]* # Option toggle )? /x ) do |match| if match[1].nil? # e.g. /(normal)/ group_id = @num_groups.to_s elsif match[2] == ':' # e.g. /(?:nocapture)/ @num_groups -= 1 @current_position += 2 elsif match[2] == '#' # e.g. /(?#comment)/ @num_groups -= 1 comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*(?=\))/)[0] @current_position += comment_group.length return PlaceHolderGroup.new elsif match[2] == '~' # e.g. /(?~absent operator)/ # The "best" way to replicate this is with a negative lookbehind: # e.g. (?~abc) --> (?:.(? (?:[^a]*) # However (!!) this generalisation is not always possible: # (?~\wa|\Wb) --> ??? # Therefore, the only 100% reliable option is just to match "nothing" @num_groups -= 1 # "Absence groups" are not counted as backrefs absence_group = rest_of_string.match(/.*?[^\\](?:\\{2})*(?=\))/)[0] @current_position += absence_group.length return PlaceHolderGroup.new elsif match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/ regexp_options_toggle(Regexp.last_match(1), Regexp.last_match(2)) @num_groups -= 1 # Toggle "groups" should not increase backref group count @current_position += $&.length + 1 if next_char == ':' # e.g. /(?i:subexpr)/ @current_position += 1 else return PlaceHolderGroup.new end elsif %w[! =].include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/ raise IllegalSyntaxError, 'Lookaheads are not regular; cannot generate examples' elsif %w[! =].include?(match[3]) # e.g. /(?<=lookbehind)/, /(?namedgroup)/ @current_position += (match[3].length + 3) group_id = match[3] end end MultiGroup.new(parse, group_id) end end def remember_old_regexp_options previous_ignorecase = @ignorecase previous_multiline = @multiline previous_extended = @extended group = yield @ignorecase = previous_ignorecase @multiline = previous_multiline @extended = previous_extended group end def regexp_options_toggle(on, off) regexp_option_toggle(on, off, '@ignorecase', 'i') regexp_option_toggle(on, off, '@multiline', 'm') regexp_option_toggle(on, off, '@extended', 'x') end def regexp_option_toggle(on, off, var, char) instance_variable_set(var, true) if on.include? char instance_variable_set(var, false) if off.include? char end end end