parser.rb in regexp-examples-0.3.0

- old
+ new

@@ -34,10 +34,22 @@
         group = parse_dot_group
       when '|'
         group = parse_or_group(repeaters)
       when '\\'
         group = parse_after_backslash_group
+      when '^', 'A'
+        if @current_position == 0
+          group = parse_single_char_group('') # Ignore the "illegal" character
+        else
+          raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular"
+        end
+      when '$', 'z', 'Z'
+        if @current_position == (regexp_string.length - 1)
+          group = parse_single_char_group('') # Ignore the "illegal" character
+        else
+          raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular"
+        end
       else
         group = parse_single_char_group(char)
       end
       group
     end
@@ -53,19 +65,39 @@
         group = parse_backreference_group($1)
       when BackslashCharMap.keys.include?(regexp_string[@current_position])
         group = CharGroup.new(
           BackslashCharMap[regexp_string[@current_position]])
       when rest_of_string =~ /\A(c|C-)(.)/ # Control character
-        # http://en.wikibooks.org/wiki/Ruby_Programming/Syntax/Literals
         @current_position += $1.length
         group = parse_single_char_group( parse_control_character($2) )
       when rest_of_string =~ /\Ax(\h{1,2})/ # Escape sequence
         @current_position += $1.length
         group = parse_single_char_group( parse_escape_sequence($1) )
-      when rest_of_string =~ /\Au(\h{4})/ # Unicode sequence
-        @current_position += 4
-        group = parse_single_char_group( parse_unicode_sequence($1) )
+      when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/ # Unicode sequence
+        @current_position += $1.length
+        sequence = $1.match(/\h{1,4}/)[0] # Strip off "{" and "}"
+        group = parse_single_char_group( parse_unicode_sequence(sequence) )
+      when rest_of_string =~ /\Ap\{([^}]+)\}/ # Named properties
+        @current_position += ($1.length + 2)
+        raise UnsupportedSyntaxError, "Named properties ({\\p#{$1}}) are not yet supported"
+      when rest_of_string =~ /\Ag/ # Subexpression call
+        # TODO: Should this be IllegalSyntaxError ?
+        raise UnsupportedSyntaxError, "Subexpression calls (\g) are not yet supported"
+      when rest_of_string =~ /\A[GbB]/ # Anchors
+        raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular"
+      when rest_of_string =~ /\AA/ # Start of string
+        if @current_position == 1
+          group = parse_single_char_group('') # Ignore the "illegal" character
+        else
+          raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular"
+        end
+      when rest_of_string =~ /\A[zZ]/ # End of string
+        if @current_position == (regexp_string.length - 1)
+          group = parse_single_char_group('') # Ignore the "illegal" character
+        else
+          raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular"
+        end
       else
         group = parse_single_char_group( regexp_string[@current_position] )
         # TODO: What about cases like \A, \z, \Z ?
       end
       group
@@ -115,10 +147,13 @@
     def parse_multi_end_group
       MultiGroupEnd.new
     end
 
     def parse_char_group
+      if rest_of_string =~ /\A\[\[:[^:]+:\]\]/
+        raise UnsupportedSyntaxError, "POSIX bracket expressions are not yet implemented"
+      end
       chars = []
       @current_position += 1
       if regexp_string[@current_position] == ']'
         # Beware of the sneaky edge case:
         # /[]]/ (match "]")
@@ -164,10 +199,10 @@
     def parse_escape_sequence(match)
       eval "?\\x#{match}"
     end
 
     def parse_unicode_sequence(match)
-      eval "?\\u#{match}"
+      eval "?\\u{#{match}}"
     end
 
     def parse_star_repeater(group)
       @current_position += 1
       StarRepeater.new(group)