lib/regexp-examples/chargroup_parser.rb in regexp-examples-1.1.3 vs lib/regexp-examples/chargroup_parser.rb in regexp-examples-1.1.4

- old
+ new

@@ -1,28 +1,33 @@ +require_relative 'parser_helpers/charset_negation_helper' + module RegexpExamples # A "sub-parser", for char groups in a regular expression # Some examples of what this class needs to parse: # [abc] - plain characters # [a-z] - ranges # [\n\b\d] - escaped characters (which may represent character sets) # [^abc] - negated group # [[a][bc]] - sub-groups (should match "a", "b" or "c") # [[:lower:]] - POSIX group - # [[a-f]&&[d-z]] - set intersection (should match "d", "f" or "f") + # [[a-f]&&[d-z]] - set intersection (should match "d", "e" or "f") # [[^:alpha:]&&[\n]a-c] - all of the above!!!! (should match "\n") class ChargroupParser - attr_reader :regexp_string + include CharsetNegationHelper + + attr_reader :regexp_string, :current_position + alias_method :length, :current_position + def initialize(regexp_string, is_sub_group: false) @regexp_string = regexp_string @is_sub_group = is_sub_group @current_position = 0 - parse + @charset = [] + @negative = false end def parse - @charset = [] - @negative = false parse_first_chars until next_char == ']' case next_char when '[' parse_sub_group_concat @@ -38,16 +43,12 @@ @charset.uniq! @current_position += 1 # To account for final "]" end - def length - @current_position - end - def result - @negative ? (CharSets::Any - @charset) : @charset + negate_if(@charset, @negative) end private def parse_first_chars @@ -64,16 +65,11 @@ parse_posix_group(Regexp.last_match(1), Regexp.last_match(2)) if @is_sub_group end end def parse_posix_group(negation_flag, name) - chars = if negation_flag.empty? - POSIXCharMap[name] - else - CharSets::Any - POSIXCharMap[name] - end - @charset.concat chars + @charset.concat negate_if(POSIXCharMap[name], !negation_flag.empty?) @current_position += (negation_flag.length + # 0 or 1, if '^' is present name.length + 2) # Length of opening and closing colons (always 2) end @@ -99,10 +95,11 @@ end def parse_sub_group_concat @current_position += 1 sub_group_parser = self.class.new(rest_of_string, is_sub_group: true) + sub_group_parser.parse @charset.concat sub_group_parser.result @current_position += sub_group_parser.length end def parse_after_ampersand @@ -115,20 +112,21 @@ end def parse_sub_group_intersect @current_position += 2 sub_group_parser = self.class.new(rest_of_string, is_sub_group: true) + sub_group_parser.parse @charset &= sub_group_parser.result @current_position += (sub_group_parser.length - 1) end def parse_after_hyphen if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range! @charset << '-' @current_position += 1 else @current_position += 1 - @charset.concat (@charset.last..parse_checking_backlash.first).to_a + @charset.concat((@charset.last..parse_checking_backlash.first).to_a) @current_position += 1 end end def rest_of_string