lib/regexp-examples/parser.rb in regexp-examples-0.0.0 vs lib/regexp-examples/parser.rb in regexp-examples-0.0.1

- old
+ new

@@ -8,21 +8,22 @@ end def parse repeaters = [] while @current_position < regexp_string.length - group = parse_group + group = parse_group(repeaters) break if group.is_a? MultiGroupEnd + repeaters = [] if group.is_a? OrGroup @current_position += 1 repeaters << parse_repeater(group) end repeaters end private - def parse_group + def parse_group(repeaters) char = regexp_string[@current_position] case char when '(' group = parse_multi_group when ')' @@ -30,28 +31,32 @@ when '[' group = parse_char_group when '.' group = parse_dot_group when '|' - group = parse_or_group + group = parse_or_group(repeaters) when '\\' group = parse_after_backslash_group else group = parse_single_char_group(char) end group end def parse_after_backslash_group @current_position += 1 - case regexp_string[@current_position..-1] - when /^(\d+)/ + case + when regexp_string[@current_position..-1] =~ /^(\d+)/ group = parse_backreference_group($&) + when BackslashCharMap.keys.include?(regexp_string[@current_position]) + group = CharGroup.new( + BackslashCharMap[regexp_string[@current_position]]) + # TODO: There are also a bunch of multi-char matches to watch out for: + # http://en.wikibooks.org/wiki/Ruby_Programming/Syntax/Literals else group = parse_single_char_group( regexp_string[@current_position] ) - # TODO: What about cases like \n, \(, \^, etc? - # SpecialCharsAfterBackslash ? + # TODO: What about cases like \A, \z, \Z ? end group end def parse_repeater(group) @@ -86,25 +91,35 @@ end def parse_char_group chars = [] @current_position += 1 - # TODO: What about the sneaky edge case of /...[]a-z].../ ? - until regexp_string[@current_position].chr == ']' - chars << regexp_string[@current_position].chr + if regexp_string[@current_position] == ']' + # Beware of the sneaky edge case: + # /[]]/ (match "]") + chars << ']' @current_position += 1 end + until regexp_string[@current_position] == ']' \ + && !regexp_string[0..@current_position-1].match(/[^\\](\\{2})*\\\z/) + # Beware of having an ODD number of "\" before the "]", e.g. + # /[\]]/ (match "]") + # /[\\]/ (match "\") + # /[\\\]]/ (match "\" or "]") + chars << regexp_string[@current_position] + @current_position += 1 + end CharGroup.new(chars) end def parse_dot_group DotGroup.new end - def parse_or_group + def parse_or_group(left_repeaters) @current_position += 1 - repeaters = parse - OrGroup.new(repeaters) + right_repeaters = parse + OrGroup.new(left_repeaters, right_repeaters) end def parse_single_char_group(char) SingleCharGroup.new(char)