lib/goodcheck/pattern.rb in goodcheck-2.3.2 vs lib/goodcheck/pattern.rb in goodcheck-2.4.0

- old
+ new

@@ -160,63 +160,121 @@ @@TYPES[:email] = -> (name) { /\b(?<#{name}>#{AUTO_EMAIL_RE})/ } - def self.regexp_for_type(name:, type:) - ty = type || :word - if @@TYPES.key?(ty) - @@TYPES[ty][name] + def self.expand(prefix, suffix, depth: 5) + if depth == 0 + [ + /[^#{suffix}]*/ + ] + else + expandeds = expand(prefix, suffix, depth: depth - 1) + [/[^#{prefix}#{suffix}]*#{prefix}#{expandeds.first}#{suffix}[^#{prefix}#{suffix}]*/] + expandeds end end + def self.regexp_for_type(name:, type:, scanner:) + prefix = scanner.pre_match[-1] + suffix = scanner.check(WORD_RE) || scanner.peek(1) + + case + when type == :__ + body = case + when prefix == "{" && suffix == "}" + ::Regexp.union(expand(prefix, suffix)) + when prefix == "(" && suffix == ")" + ::Regexp.union(expand(prefix, suffix)) + when prefix == "[" && suffix == "]" + ::Regexp.union(expand(prefix, suffix)) + when prefix == "<" && suffix == ">" + ::Regexp.union(expand(prefix, suffix)) + else + unless suffix.empty? + /(?~#{::Regexp.escape(suffix)})/ + else + /.*/ + end + end + /(?<#{name}>#{body})/ + + when @@TYPES.key?(type) + @@TYPES[type][name] + end + end + + WORD_RE = /\w+|[\p{L}&&\p{^ASCII}]+/ + def self.compile_tokens(source, variables, case_sensitive:) tokens = [] s = StringScanner.new(source) until s.eos? case when s.scan(/\${(?<name>[a-zA-Z_]\w*)(?::(?<type>#{::Regexp.union(*@@TYPES.keys.map(&:to_s))}))?}/) name = s[:name].to_sym - type = s[:type] && s[:type].to_sym + type = s[:type] ? s[:type].to_sym : :__ if variables.key?(name) + if !s[:type] && s.pre_match == "" + Goodcheck.logger.error "Variable binding ${#{name}} at the beginning of pattern would cause an unexpected match" + end + if !s[:type] && s.peek(1) == "" + Goodcheck.logger.error "Variable binding ${#{name}} at the end of pattern would cause an unexpected match" + end + + tokens << :nobr variables[name].type = type - regexp = regexp_for_type(name: name, type: type).to_s + regexp = regexp_for_type(name: name, type: type, scanner: s).to_s if tokens.empty? && (type == :word || type == :identifier) regexp = /\b#{regexp.to_s}/ end tokens << regexp.to_s + tokens << :nobr else tokens << ::Regexp.escape("${") tokens << ::Regexp.escape(name.to_s) tokens << ::Regexp.escape("}") end when s.scan(/\(|\)|\{|\}|\[|\]|\<|\>/) tokens << ::Regexp.escape(s.matched) when s.scan(/\s+/) tokens << '\s+' - when s.scan(/\w+|[\p{L}&&\p{^ASCII}]+/) + when s.scan(WORD_RE) tokens << ::Regexp.escape(s.matched) when s.scan(%r{[!"#%&'=\-^~¥\\|`@*:+;/?.,]+}) tokens << ::Regexp.escape(s.matched.rstrip) when s.scan(/./) tokens << ::Regexp.escape(s.matched) end end - if tokens.first =~ /\A\p{L}/ + if source[0] =~ /\p{L}/ tokens.first.prepend('\b') end - if tokens.last =~ /\p{L}\Z/ + if source[-1] =~ /\p{L}/ tokens.last << '\b' end options = ::Regexp::MULTILINE options |= ::Regexp::IGNORECASE unless case_sensitive - ::Regexp.new(tokens.join('\s*').gsub(/\\s\*(\\s\+\\s\*)+/, '\s+'), options) + buf, skip = tokens[0].is_a?(String) ? [tokens[0], false] : ["", true] + tokens.drop(1).each do |tok| + if tok == :nobr + skip = true + else + buf << '\s*' unless skip + skip = false + buf << tok + end + end + + ::Regexp.new(buf. + gsub(/\\s\*(\\s\+\\s\*)+/, '\s+'). + gsub(/#{::Regexp.escape('\s+\s*')}/, '\s+'). + gsub(/#{::Regexp.escape('\s*\s+')}/, '\s+'), options) end end end end