lib/regexp-examples/parser.rb in regexp-examples-0.2.4 vs lib/regexp-examples/parser.rb in regexp-examples-0.3.0
- old
+ new
@@ -34,10 +34,22 @@
group = parse_dot_group
when '|'
group = parse_or_group(repeaters)
when '\\'
group = parse_after_backslash_group
+ when '^', 'A'
+ if @current_position == 0
+ group = parse_single_char_group('') # Ignore the "illegal" character
+ else
+ raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular"
+ end
+ when '$', 'z', 'Z'
+ if @current_position == (regexp_string.length - 1)
+ group = parse_single_char_group('') # Ignore the "illegal" character
+ else
+ raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular"
+ end
else
group = parse_single_char_group(char)
end
group
end
@@ -53,19 +65,39 @@
group = parse_backreference_group($1)
when BackslashCharMap.keys.include?(regexp_string[@current_position])
group = CharGroup.new(
BackslashCharMap[regexp_string[@current_position]])
when rest_of_string =~ /\A(c|C-)(.)/ # Control character
- # http://en.wikibooks.org/wiki/Ruby_Programming/Syntax/Literals
@current_position += $1.length
group = parse_single_char_group( parse_control_character($2) )
when rest_of_string =~ /\Ax(\h{1,2})/ # Escape sequence
@current_position += $1.length
group = parse_single_char_group( parse_escape_sequence($1) )
- when rest_of_string =~ /\Au(\h{4})/ # Unicode sequence
- @current_position += 4
- group = parse_single_char_group( parse_unicode_sequence($1) )
+ when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/ # Unicode sequence
+ @current_position += $1.length
+ sequence = $1.match(/\h{1,4}/)[0] # Strip off "{" and "}"
+ group = parse_single_char_group( parse_unicode_sequence(sequence) )
+ when rest_of_string =~ /\Ap\{([^}]+)\}/ # Named properties
+ @current_position += ($1.length + 2)
+ raise UnsupportedSyntaxError, "Named properties ({\\p#{$1}}) are not yet supported"
+ when rest_of_string =~ /\Ag/ # Subexpression call
+ # TODO: Should this be IllegalSyntaxError ?
+ raise UnsupportedSyntaxError, "Subexpression calls (\g) are not yet supported"
+ when rest_of_string =~ /\A[GbB]/ # Anchors
+ raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular"
+ when rest_of_string =~ /\AA/ # Start of string
+ if @current_position == 1
+ group = parse_single_char_group('') # Ignore the "illegal" character
+ else
+ raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular"
+ end
+ when rest_of_string =~ /\A[zZ]/ # End of string
+ if @current_position == (regexp_string.length - 1)
+ group = parse_single_char_group('') # Ignore the "illegal" character
+ else
+ raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular"
+ end
else
group = parse_single_char_group( regexp_string[@current_position] )
# TODO: What about cases like \A, \z, \Z ?
end
group
@@ -115,10 +147,13 @@
def parse_multi_end_group
MultiGroupEnd.new
end
def parse_char_group
+ if rest_of_string =~ /\A\[\[:[^:]+:\]\]/
+ raise UnsupportedSyntaxError, "POSIX bracket expressions are not yet implemented"
+ end
chars = []
@current_position += 1
if regexp_string[@current_position] == ']'
# Beware of the sneaky edge case:
# /[]]/ (match "]")
@@ -164,10 +199,10 @@
def parse_escape_sequence(match)
eval "?\\x#{match}"
end
def parse_unicode_sequence(match)
- eval "?\\u#{match}"
+ eval "?\\u{#{match}}"
end
def parse_star_repeater(group)
@current_position += 1
StarRepeater.new(group)