lib/regexp-examples/parser.rb in regexp-examples-1.0.1 vs lib/regexp-examples/parser.rb in regexp-examples-1.0.2

- old
+ new

@@ -44,48 +44,79 @@ when '|' group = parse_or_group(repeaters) when '\\' group = parse_after_backslash_group when '^' - if @current_position == 0 - group = PlaceHolderGroup.new # Ignore the "illegal" character - else - raise IllegalSyntaxError, "Anchors ('#{next_char}') cannot be supported, as they are not regular" - end + group = parse_caret when '$' - if @current_position == (regexp_string.length - 1) - group = PlaceHolderGroup.new # Ignore the "illegal" character - else - raise IllegalSyntaxError, "Anchors ('#{next_char}') cannot be supported, as they are not regular" - end + group = parse_dollar when /[#\s]/ - if @extended - parse_extended_whitespace - group = PlaceHolderGroup.new # Ignore the whitespace/comment - else - group = parse_single_char_group(next_char) - end + group = parse_extended_whitespace else group = parse_single_char_group(next_char) end group end + def parse_repeater(group) + case next_char + when '*' + repeater = parse_star_repeater(group) + when '+' + repeater = parse_plus_repeater(group) + when '?' + repeater = parse_question_mark_repeater(group) + when '{' + repeater = parse_range_repeater(group) + else + repeater = parse_one_time_repeater(group) + end + repeater + end + + def parse_caret + if @current_position == 0 + return PlaceHolderGroup.new # Ignore the "illegal" character + else + raise_anchors_exception! + end + end + + def parse_dollar + if @current_position == (regexp_string.length - 1) + return PlaceHolderGroup.new # Ignore the "illegal" character + else + raise_anchors_exception! + end + end + def parse_extended_whitespace + if @extended + skip_whitespace + group = PlaceHolderGroup.new # Ignore the whitespace/comment + else + group = parse_single_char_group(next_char) + end + group + end + + def skip_whitespace whitespace_chars = rest_of_string.match(/#.*|\s+/)[0] @current_position += whitespace_chars.length - 1 end def parse_after_backslash_group @current_position += 1 case when rest_of_string =~ /\A(\d{1,3})/ @current_position += ($1.length - 1) # In case of 10+ backrefs! group = parse_backreference_group($1) - when rest_of_string =~ /\Ak<([^>]+)>/ # Named capture group + when rest_of_string =~ /\Ak['<]([\w-]+)['>]/ # Named capture group @current_position += ($1.length + 2) - group = parse_backreference_group($1) + # Check for RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/ + group_id = ($1.to_i < 0) ? (@num_groups + $1.to_i + 1) : $1 + group = parse_backreference_group(group_id) when BackslashCharMap.keys.include?(next_char) group = CharGroup.new( BackslashCharMap[next_char].dup, @ignorecase ) @@ -115,54 +146,36 @@ when next_char == 'R' # Linebreak group = CharGroup.new(["\r\n", "\n", "\v", "\f", "\r"], @ignorecase) # A bit hacky... when next_char == 'g' # Subexpression call raise IllegalSyntaxError, "Subexpression calls (\\g) cannot be supported, as they are not regular" when next_char =~ /[bB]/ # Anchors - raise IllegalSyntaxError, "Anchors ('\\#{next_char}') cannot be supported, as they are not regular" + raise_anchors_exception! when next_char =~ /[AG]/ # Start of string if @current_position == 1 group = PlaceHolderGroup.new else - raise IllegalSyntaxError, "Anchors ('\\#{next_char}') cannot be supported, as they are not regular" + raise_anchors_exception! end when next_char =~ /[zZ]/ # End of string if @current_position == (regexp_string.length - 1) group = PlaceHolderGroup.new else - raise IllegalSyntaxError, "Anchors ('\\#{next_char}') cannot be supported, as they are not regular" + raise_anchors_exception! end else group = parse_single_char_group( next_char ) end group end - def parse_repeater(group) - case next_char - when '*' - repeater = parse_star_repeater(group) - when '+' - repeater = parse_plus_repeater(group) - when '?' - repeater = parse_question_mark_repeater(group) - when '{' - repeater = parse_range_repeater(group) - else - repeater = parse_one_time_repeater(group) - end - repeater - end - def parse_multi_group @current_position += 1 @num_groups += 1 - group_id = nil # init - previous_ignorecase = @ignorecase - previous_multiline = @multiline - previous_extended = @extended - rest_of_string.match( - / + remember_old_regexp_options do + group_id = nil # init + rest_of_string.match( + / \A (\?)? # Is it a "special" group, i.e. starts with a "?"? ( : # Non capture group |! # Neglookahead @@ -173,43 +186,52 @@ ! # Neglookbehind |= # Lookbehind |[^>]+ # Named capture ) |[mix]*-?[mix]* # Option toggle - )? - /x - ) do |match| - case - when match[1].nil? # e.g. /(normal)/ - group_id = @num_groups.to_s - when match[2] == ':' # e.g. /(?:nocapture)/ - @current_position += 2 - when match[2] == '#' # e.g. /(?#comment)/ - comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*\)/)[0] - @current_position += comment_group.length - when match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/ - regexp_options_toggle($1, $2) - @current_position += $&.length + 1 - if next_char == ':' # e.g. /(?i:subexpr)/ - @current_position += 1 - else - return PlaceHolderGroup.new + )? + /x + ) do |match| + case + when match[1].nil? # e.g. /(normal)/ + group_id = @num_groups.to_s + when match[2] == ':' # e.g. /(?:nocapture)/ + @current_position += 2 + when match[2] == '#' # e.g. /(?#comment)/ + comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*\)/)[0] + @current_position += comment_group.length + when match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/ + regexp_options_toggle($1, $2) + @num_groups -= 1 # Toggle "groups" should not increase backref group count + @current_position += $&.length + 1 + if next_char == ':' # e.g. /(?i:subexpr)/ + @current_position += 1 + else + return PlaceHolderGroup.new + end + when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/ + raise IllegalSyntaxError, "Lookaheads are not regular; cannot generate examples" + when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/ + raise IllegalSyntaxError, "Lookbehinds are not regular; cannot generate examples" + else # e.g. /(?<name>namedgroup)/ + @current_position += (match[3].length + 3) + group_id = match[3] end - when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/ - raise IllegalSyntaxError, "Lookaheads are not regular; cannot generate examples" - when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/ - raise IllegalSyntaxError, "Lookbehinds are not regular; cannot generate examples" - else # e.g. /(?<name>namedgroup)/ - @current_position += (match[3].length + 3) - group_id = match[3] end + MultiGroup.new(parse, group_id) end - groups = parse + end + + def remember_old_regexp_options + previous_ignorecase = @ignorecase + previous_multiline = @multiline + previous_extended = @extended + group = yield @ignorecase = previous_ignorecase @multiline = previous_multiline @extended = previous_extended - MultiGroup.new(groups, group_id) + group end def regexp_options_toggle(on, off) @ignorecase = true if (on.include? "i") @ignorecase = false if (off.include? "i") @@ -244,12 +266,12 @@ def parse_single_char_group(char) SingleCharGroup.new(char, @ignorecase) end - def parse_backreference_group(match) - BackReferenceGroup.new(match) + def parse_backreference_group(group_id) + BackReferenceGroup.new(group_id) end def parse_control_character(char) (char.ord % 32).chr # Black magic! # eval "?\\C-#{char.chr}" # Doesn't work for e.g. char = "?" @@ -304,9 +326,13 @@ repeater = parse_question_mark_repeater(repeater) else parse_reluctant_or_possessive_repeater end repeater + end + + def raise_anchors_exception! + raise IllegalSyntaxError, "Anchors ('#{next_char}') cannot be supported, as they are not regular" end def parse_one_time_repeater(group) OneTimeRepeater.new(group) end