lib/regexp-examples/parser.rb in regexp-examples-0.0.0 vs lib/regexp-examples/parser.rb in regexp-examples-0.0.1
- old
+ new
@@ -8,21 +8,22 @@
end
def parse
repeaters = []
while @current_position < regexp_string.length
- group = parse_group
+ group = parse_group(repeaters)
break if group.is_a? MultiGroupEnd
+ repeaters = [] if group.is_a? OrGroup
@current_position += 1
repeaters << parse_repeater(group)
end
repeaters
end
private
- def parse_group
+ def parse_group(repeaters)
char = regexp_string[@current_position]
case char
when '('
group = parse_multi_group
when ')'
@@ -30,28 +31,32 @@
when '['
group = parse_char_group
when '.'
group = parse_dot_group
when '|'
- group = parse_or_group
+ group = parse_or_group(repeaters)
when '\\'
group = parse_after_backslash_group
else
group = parse_single_char_group(char)
end
group
end
def parse_after_backslash_group
@current_position += 1
- case regexp_string[@current_position..-1]
- when /^(\d+)/
+ case
+ when regexp_string[@current_position..-1] =~ /^(\d+)/
group = parse_backreference_group($&)
+ when BackslashCharMap.keys.include?(regexp_string[@current_position])
+ group = CharGroup.new(
+ BackslashCharMap[regexp_string[@current_position]])
+ # TODO: There are also a bunch of multi-char matches to watch out for:
+ # http://en.wikibooks.org/wiki/Ruby_Programming/Syntax/Literals
else
group = parse_single_char_group( regexp_string[@current_position] )
- # TODO: What about cases like \n, \(, \^, etc?
- # SpecialCharsAfterBackslash ?
+ # TODO: What about cases like \A, \z, \Z ?
end
group
end
def parse_repeater(group)
@@ -86,25 +91,35 @@
end
def parse_char_group
chars = []
@current_position += 1
- # TODO: What about the sneaky edge case of /...[]a-z].../ ?
- until regexp_string[@current_position].chr == ']'
- chars << regexp_string[@current_position].chr
+ if regexp_string[@current_position] == ']'
+ # Beware of the sneaky edge case:
+ # /[]]/ (match "]")
+ chars << ']'
@current_position += 1
end
+ until regexp_string[@current_position] == ']' \
+ && !regexp_string[0..@current_position-1].match(/[^\\](\\{2})*\\\z/)
+ # Beware of having an ODD number of "\" before the "]", e.g.
+ # /[\]]/ (match "]")
+ # /[\\]/ (match "\")
+ # /[\\\]]/ (match "\" or "]")
+ chars << regexp_string[@current_position]
+ @current_position += 1
+ end
CharGroup.new(chars)
end
def parse_dot_group
DotGroup.new
end
- def parse_or_group
+ def parse_or_group(left_repeaters)
@current_position += 1
- repeaters = parse
- OrGroup.new(repeaters)
+ right_repeaters = parse
+ OrGroup.new(left_repeaters, right_repeaters)
end
def parse_single_char_group(char)
SingleCharGroup.new(char)