lib/regexp-examples/parser.rb in regexp-examples-0.4.0 vs lib/regexp-examples/parser.rb in regexp-examples-0.4.1

- old
+ new

@@ -26,12 +26,11 @@ end private def parse_group(repeaters) - char = regexp_string[@current_position] - case char + case next_char when '(' group = parse_multi_group when ')' group = parse_multi_end_group when '[' @@ -53,11 +52,11 @@ group = parse_single_char_group('') # Ignore the "illegal" character else raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular" end else - group = parse_single_char_group(char) + group = parse_single_char_group(next_char) end group end def parse_after_backslash_group @@ -67,15 +66,15 @@ @current_position += ($1.length - 1) # In case of 10+ backrefs! group = parse_backreference_group($1) when rest_of_string =~ /\Ak<([^>]+)>/ # Named capture group @current_position += ($1.length + 2) group = parse_backreference_group($1) - when BackslashCharMap.keys.include?(regexp_string[@current_position]) + when BackslashCharMap.keys.include?(next_char) group = CharGroup.new( # Note: The `.dup` is important, as it prevents modifying the constant, in # CharGroup#init_ranges (where the '-' is moved to the front) - BackslashCharMap[regexp_string[@current_position]].dup + BackslashCharMap[next_char].dup ) when rest_of_string =~ /\A(c|C-)(.)/ # Control character @current_position += $1.length group = parse_single_char_group( parse_control_character($2) ) when rest_of_string =~ /\Ax(\h{1,2})/ # Escape sequence @@ -104,18 +103,17 @@ group = parse_single_char_group('') # Ignore the "illegal" character else raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular" end else - group = parse_single_char_group( regexp_string[@current_position] ) + group = parse_single_char_group( next_char ) end group end def parse_repeater(group) - char = regexp_string[@current_position] - case char + case next_char when '*' repeater = parse_star_repeater(group) when '+' repeater = parse_plus_repeater(group) when '?' @@ -160,23 +158,23 @@ if rest_of_string =~ /\A\[\[:[^:]+:\]\]/ raise UnsupportedSyntaxError, "POSIX bracket expressions are not yet implemented" end chars = [] @current_position += 1 - if regexp_string[@current_position] == ']' + if next_char == ']' # Beware of the sneaky edge case: # /[]]/ (match "]") chars << ']' @current_position += 1 end - until regexp_string[@current_position] == ']' \ + until next_char == ']' \ && !regexp_string[0..@current_position-1].match(/[^\\](\\{2})*\\\z/) # Beware of having an ODD number of "\" before the "]", e.g. # /[\]]/ (match "]") # /[\\]/ (match "\") # /[\\\]]/ (match "\" or "]") - chars << regexp_string[@current_position] + chars << next_char @current_position += 1 end CharGroup.new(chars) end @@ -212,18 +210,28 @@ eval "?\\u{#{match}}" end def parse_star_repeater(group) @current_position += 1 + parse_non_greedy_repeater StarRepeater.new(group) end def parse_plus_repeater(group) @current_position += 1 + parse_non_greedy_repeater PlusRepeater.new(group) end + def parse_non_greedy_repeater + if next_char == '?' + # TODO: Delay this warning until after parsing, and only display if capture groups are used + warn "Warning: Non-greedy operators (*? and +?) might not work properly, when using capture groups" + @current_position += 1 + end + end + def parse_question_mark_repeater(group) @current_position += 1 QuestionMarkRepeater.new(group) end @@ -240,9 +248,13 @@ OneTimeRepeater.new(group) end def rest_of_string regexp_string[@current_position..-1] + end + + def next_char + regexp_string[@current_position] end end end