lib/regexp-examples/chargroup_parser.rb in regexp-examples-1.4.4 vs lib/regexp-examples/chargroup_parser.rb in regexp-examples-1.5.0
- old
+ new
@@ -1,6 +1,8 @@
require_relative 'parser_helpers/charset_negation_helper'
+require_relative 'parser_helpers/parse_group_helper'
+require_relative 'parser_helpers/parse_after_backslash_group_helper'
module RegexpExamples
# A "sub-parser", for char groups in a regular expression
# Some examples of what this class needs to parse:
# [abc] - plain characters
@@ -11,10 +13,12 @@
# [[:lower:]] - POSIX group
# [[a-f]&&[d-z]] - set intersection (should match "d", "e" or "f")
# [[^:alpha:]&&[\n]a-c] - all of the above!!!! (should match "\n")
class ChargroupParser
include CharsetNegationHelper
+ include ParseGroupHelper
+ include ParseAfterBackslashGroupHelper
attr_reader :regexp_string, :current_position
alias length current_position
def initialize(regexp_string, is_sub_group: false)
@@ -35,11 +39,10 @@
parse_after_hyphen
when '&'
parse_after_ampersand
else
@charset.concat parse_checking_backlash
- @current_position += 1
end
end
@charset.uniq!
@current_position += 1 # To account for final "]"
@@ -77,19 +80,27 @@
def parse_checking_backlash
if next_char == '\\'
@current_position += 1
parse_after_backslash
else
- [next_char]
+ r = [next_char]
+ @current_position += 1
+ r
end
end
def parse_after_backslash
if next_char == 'b'
+ @current_position += 1
["\b"]
+ elsif rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/
+ @current_position += 1
+ parse_backslash_unicode_sequence(Regexp.last_match(1)).result.map(&:to_s)
else
- CharSets::BackslashCharMap.fetch(next_char, [next_char])
+ char = CharSets::BackslashCharMap.fetch(next_char, [next_char])
+ @current_position += 1
+ char
end
end
def parse_sub_group_concat
@current_position += 1
@@ -115,16 +126,21 @@
@charset &= sub_group_parser.result
@current_position += (sub_group_parser.length - 1)
end
def parse_after_hyphen
- if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
+ r = if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
+ @current_position += 1
@charset << '-'
+ elsif rest_of_string =~ /\A-\\u(\h{4}|\{\h{1,4}\})/
+ @current_position += 3
+ char = parse_backslash_unicode_sequence(Regexp.last_match(1)).result.first.to_s
+ @charset.concat((@charset.last..char).to_a)
else
@current_position += 1
@charset.concat((@charset.last..parse_checking_backlash.first).to_a)
end
- @current_position += 1
+ r
end
def rest_of_string
regexp_string[@current_position..-1]
end