parser.rb in regexp-examples-0.0.1

- old
+ new

@@ -8,21 +8,22 @@
     end
 
     def parse
       repeaters = []
       while @current_position < regexp_string.length
-        group = parse_group
+        group = parse_group(repeaters)
         break if group.is_a? MultiGroupEnd
+        repeaters = [] if group.is_a? OrGroup
         @current_position += 1
         repeaters << parse_repeater(group)
       end
       repeaters
     end
 
     private
 
-    def parse_group
+    def parse_group(repeaters)
       char = regexp_string[@current_position]
       case char
       when '('
         group = parse_multi_group
       when ')'
@@ -30,28 +31,32 @@
       when '['
         group = parse_char_group
       when '.'
         group = parse_dot_group
       when '|'
-        group = parse_or_group
+        group = parse_or_group(repeaters)
       when '\\'
         group = parse_after_backslash_group
       else
         group = parse_single_char_group(char)
       end
       group
     end
 
     def parse_after_backslash_group
       @current_position += 1
-      case regexp_string[@current_position..-1]
-      when /^(\d+)/
+      case
+      when regexp_string[@current_position..-1] =~ /^(\d+)/
         group = parse_backreference_group($&)
+      when BackslashCharMap.keys.include?(regexp_string[@current_position])
+        group = CharGroup.new(
+          BackslashCharMap[regexp_string[@current_position]])
+        # TODO: There are also a bunch of multi-char matches to watch out for:
+        # http://en.wikibooks.org/wiki/Ruby_Programming/Syntax/Literals
       else
         group = parse_single_char_group( regexp_string[@current_position] )
-        # TODO: What about cases like \n, \(, \^, etc?
-        # SpecialCharsAfterBackslash ?
+        # TODO: What about cases like \A, \z, \Z ?
       end
       group
     end
 
     def parse_repeater(group)
@@ -86,25 +91,35 @@
     end
 
     def parse_char_group
       chars = []
       @current_position += 1
-      # TODO: What about the sneaky edge case of /...[]a-z].../ ?
-      until regexp_string[@current_position].chr == ']'
-        chars << regexp_string[@current_position].chr
+      if regexp_string[@current_position] == ']'
+        # Beware of the sneaky edge case:
+        # /[]]/ (match "]")
+        chars << ']'
         @current_position += 1
       end
+      until regexp_string[@current_position] == ']' \
+        && !regexp_string[0..@current_position-1].match(/[^\\](\\{2})*\\\z/)
+        # Beware of having an ODD number of "\" before the "]", e.g.
+        # /[\]]/ (match "]")
+        # /[\\]/ (match "\")
+        # /[\\\]]/ (match "\" or "]")
+        chars << regexp_string[@current_position]
+        @current_position += 1
+      end
       CharGroup.new(chars)
     end
 
     def parse_dot_group
       DotGroup.new
     end
 
-    def parse_or_group
+    def parse_or_group(left_repeaters)
       @current_position += 1
-      repeaters = parse
-      OrGroup.new(repeaters)
+      right_repeaters = parse
+      OrGroup.new(left_repeaters, right_repeaters)
     end
 
 
     def parse_single_char_group(char)
       SingleCharGroup.new(char)