pattern.rb in goodcheck-2.4.0

- old
+ new
@@ -160,63 +160,121 @@
 
       @@TYPES[:email] = -> (name) {
         /\b(?<#{name}>#{AUTO_EMAIL_RE})/
       }
 
-      def self.regexp_for_type(name:, type:)
-        ty = type || :word
-        if @@TYPES.key?(ty)
-          @@TYPES[ty][name]
+      def self.expand(prefix, suffix, depth: 5)
+        if depth == 0
+          [
+            /[^#{suffix}]*/
+          ]
+        else
+          expandeds = expand(prefix, suffix, depth: depth - 1)
+          [/[^#{prefix}#{suffix}]*#{prefix}#{expandeds.first}#{suffix}[^#{prefix}#{suffix}]*/] + expandeds
         end
       end
 
+      def self.regexp_for_type(name:, type:, scanner:)
+        prefix = scanner.pre_match[-1]
+        suffix = scanner.check(WORD_RE) || scanner.peek(1)
+
+        case
+        when type == :__
+          body = case
+                 when prefix == "{" && suffix == "}"
+                   ::Regexp.union(expand(prefix, suffix))
+                 when prefix == "(" && suffix == ")"
+                   ::Regexp.union(expand(prefix, suffix))
+                 when prefix == "[" && suffix == "]"
+                   ::Regexp.union(expand(prefix, suffix))
+                 when prefix == "<" && suffix == ">"
+                   ::Regexp.union(expand(prefix, suffix))
+                 else
+                   unless suffix.empty?
+                     /(?~#{::Regexp.escape(suffix)})/
+                   else
+                     /.*/
+                   end
+                 end
+          /(?<#{name}>#{body})/
+
+        when @@TYPES.key?(type)
+          @@TYPES[type][name]
+        end
+      end
+
+      WORD_RE = /\w+|[\p{L}&&\p{^ASCII}]+/
+
       def self.compile_tokens(source, variables, case_sensitive:)
         tokens = []
         s = StringScanner.new(source)
 
         until s.eos?
           case
           when s.scan(/\${(?<name>[a-zA-Z_]\w*)(?::(?<type>#{::Regexp.union(*@@TYPES.keys.map(&:to_s))}))?}/)
             name = s[:name].to_sym
-            type = s[:type] && s[:type].to_sym
+            type = s[:type] ? s[:type].to_sym : :__
 
             if variables.key?(name)
+              if !s[:type] && s.pre_match == ""
+                Goodcheck.logger.error "Variable binding ${#{name}} at the beginning of pattern would cause an unexpected match"
+              end
+              if !s[:type] && s.peek(1) == ""
+                Goodcheck.logger.error "Variable binding ${#{name}} at the end of pattern would cause an unexpected match"
+              end
+
+              tokens << :nobr
               variables[name].type = type
-              regexp = regexp_for_type(name: name, type: type).to_s
+              regexp = regexp_for_type(name: name, type: type, scanner: s).to_s
               if tokens.empty? && (type == :word || type == :identifier)
                 regexp = /\b#{regexp.to_s}/
               end
               tokens << regexp.to_s
+              tokens << :nobr
             else
               tokens << ::Regexp.escape("${")
               tokens << ::Regexp.escape(name.to_s)
               tokens << ::Regexp.escape("}")
             end
           when s.scan(/\(|\)|\{|\}|\[|\]|\<|\>/)
             tokens << ::Regexp.escape(s.matched)
           when s.scan(/\s+/)
             tokens << '\s+'
-          when s.scan(/\w+|[\p{L}&&\p{^ASCII}]+/)
+          when s.scan(WORD_RE)
             tokens << ::Regexp.escape(s.matched)
           when s.scan(%r{[!"#%&'=\-^~¥\\|`@*:+;/?.,]+})
             tokens << ::Regexp.escape(s.matched.rstrip)
           when s.scan(/./)
             tokens << ::Regexp.escape(s.matched)
           end
         end
 
-        if tokens.first =~ /\A\p{L}/
+        if source[0] =~ /\p{L}/
           tokens.first.prepend('\b')
         end
 
-        if tokens.last =~ /\p{L}\Z/
+        if source[-1] =~ /\p{L}/
           tokens.last << '\b'
         end
 
         options = ::Regexp::MULTILINE
         options |= ::Regexp::IGNORECASE unless case_sensitive
 
-        ::Regexp.new(tokens.join('\s*').gsub(/\\s\*(\\s\+\\s\*)+/, '\s+'), options)
+        buf, skip = tokens[0].is_a?(String) ? [tokens[0], false] : ["", true]
+        tokens.drop(1).each do |tok|
+          if tok == :nobr
+            skip = true
+          else
+            buf << '\s*' unless skip
+            skip = false
+            buf << tok
+          end
+        end
+
+        ::Regexp.new(buf.
+          gsub(/\\s\*(\\s\+\\s\*)+/, '\s+').
+          gsub(/#{::Regexp.escape('\s+\s*')}/, '\s+').
+          gsub(/#{::Regexp.escape('\s*\s+')}/, '\s+'), options)
       end
     end
   end
 end