lib/regexp-examples/parser.rb in regexp-examples-0.2.4 vs lib/regexp-examples/parser.rb in regexp-examples-0.3.0

- old
+ new

@@ -34,10 +34,22 @@ group = parse_dot_group when '|' group = parse_or_group(repeaters) when '\\' group = parse_after_backslash_group + when '^', 'A' + if @current_position == 0 + group = parse_single_char_group('') # Ignore the "illegal" character + else + raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular" + end + when '$', 'z', 'Z' + if @current_position == (regexp_string.length - 1) + group = parse_single_char_group('') # Ignore the "illegal" character + else + raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular" + end else group = parse_single_char_group(char) end group end @@ -53,19 +65,39 @@ group = parse_backreference_group($1) when BackslashCharMap.keys.include?(regexp_string[@current_position]) group = CharGroup.new( BackslashCharMap[regexp_string[@current_position]]) when rest_of_string =~ /\A(c|C-)(.)/ # Control character - # http://en.wikibooks.org/wiki/Ruby_Programming/Syntax/Literals @current_position += $1.length group = parse_single_char_group( parse_control_character($2) ) when rest_of_string =~ /\Ax(\h{1,2})/ # Escape sequence @current_position += $1.length group = parse_single_char_group( parse_escape_sequence($1) ) - when rest_of_string =~ /\Au(\h{4})/ # Unicode sequence - @current_position += 4 - group = parse_single_char_group( parse_unicode_sequence($1) ) + when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/ # Unicode sequence + @current_position += $1.length + sequence = $1.match(/\h{1,4}/)[0] # Strip off "{" and "}" + group = parse_single_char_group( parse_unicode_sequence(sequence) ) + when rest_of_string =~ /\Ap\{([^}]+)\}/ # Named properties + @current_position += ($1.length + 2) + raise UnsupportedSyntaxError, "Named properties ({\\p#{$1}}) are not yet supported" + when rest_of_string =~ /\Ag/ # Subexpression call + # TODO: Should this be IllegalSyntaxError ? + raise UnsupportedSyntaxError, "Subexpression calls (\g) are not yet supported" + when rest_of_string =~ /\A[GbB]/ # Anchors + raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular" + when rest_of_string =~ /\AA/ # Start of string + if @current_position == 1 + group = parse_single_char_group('') # Ignore the "illegal" character + else + raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular" + end + when rest_of_string =~ /\A[zZ]/ # End of string + if @current_position == (regexp_string.length - 1) + group = parse_single_char_group('') # Ignore the "illegal" character + else + raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular" + end else group = parse_single_char_group( regexp_string[@current_position] ) # TODO: What about cases like \A, \z, \Z ? end group @@ -115,10 +147,13 @@ def parse_multi_end_group MultiGroupEnd.new end def parse_char_group + if rest_of_string =~ /\A\[\[:[^:]+:\]\]/ + raise UnsupportedSyntaxError, "POSIX bracket expressions are not yet implemented" + end chars = [] @current_position += 1 if regexp_string[@current_position] == ']' # Beware of the sneaky edge case: # /[]]/ (match "]") @@ -164,10 +199,10 @@ def parse_escape_sequence(match) eval "?\\x#{match}" end def parse_unicode_sequence(match) - eval "?\\u#{match}" + eval "?\\u{#{match}}" end def parse_star_repeater(group) @current_position += 1 StarRepeater.new(group)