lib/goodcheck/pattern.rb in goodcheck-1.7.1 vs lib/goodcheck/pattern.rb in goodcheck-2.1.0
- old
+ new
@@ -1,74 +1,215 @@
module Goodcheck
- class Pattern
- attr_reader :source
- attr_reader :regexp
- attr_reader :globs
+ module Pattern
+ class Literal
+ attr_reader :source
+ attr_reader :case_sensitive
- def initialize(source:, regexp:, globs:)
- @source = source
- @regexp = regexp
- @globs = globs
- end
+ def initialize(source:, case_sensitive:)
+ @source = source
+ @case_sensitive = case_sensitive
+ end
- def self.literal(literal, case_sensitive:, globs: [])
- new(
- source: literal,
- regexp: Regexp.compile(Regexp.escape(literal), !case_sensitive),
- globs: globs
- )
+ def regexp
+ @regexp ||= ::Regexp.compile(::Regexp.escape(source), !case_sensitive)
+ end
end
- def self.regexp(regexp, case_sensitive:, multiline:, globs: [])
- options = 0
- options |= Regexp::IGNORECASE unless case_sensitive
- options |= Regexp::MULTILINE if multiline
+ class Regexp
+ attr_reader :source
+ attr_reader :case_sensitive
+ attr_reader :multiline
- new(
- source: regexp,
- regexp: Regexp.compile(regexp, options),
- globs: globs
- )
- end
+ def initialize(source:, case_sensitive:, multiline:, regexp: nil)
+ @source = source
+ @case_sensitive = case_sensitive
+ @multiline = multiline
+ @regexp = regexp
+ end
- def self.token(tokens, case_sensitive:, globs: [])
- new(
- source: tokens,
- regexp: compile_tokens(tokens, case_sensitive: case_sensitive),
- globs: globs
- )
+ def regexp
+ @regexp ||= begin
+ options = 0
+ options |= ::Regexp::IGNORECASE unless case_sensitive
+ options |= ::Regexp::MULTILINE if multiline
+ ::Regexp.compile(source, options)
+ end
+ end
end
- def self.compile_tokens(source, case_sensitive:)
- tokens = []
- s = StringScanner.new(source)
+ class Token
+ attr_reader :source, :case_sensitive, :variables
- until s.eos?
- case
- when s.scan(/\(|\)|\{|\}|\[|\]|\<|\>/)
- tokens << Regexp.escape(s.matched)
- when s.scan(/\s+/)
- tokens << '\s+'
- when s.scan(/\w+|[\p{Letter}&&\p{^ASCII}]+/)
- tokens << Regexp.escape(s.matched)
- when s.scan(%r{[!"#$%&'=\-^~¥\\|`@*:+;/?.,]+})
- tokens << Regexp.escape(s.matched.rstrip)
- when s.scan(/./)
- tokens << Regexp.escape(s.matched)
+ def initialize(source:, variables:, case_sensitive:)
+ @source = source
+ @variables = variables
+ @case_sensitive = case_sensitive
+ end
+
+ def regexp
+ @regexp ||= Token.compile_tokens(source, variables, case_sensitive: case_sensitive)
+ end
+
+ class VarPattern
+ attr_reader :negated
+ attr_reader :patterns
+ attr_accessor :type
+
+ def initialize(patterns:, negated:)
+ @patterns = patterns
+ @negated = negated
end
+
+ def cast(str)
+ case type
+ when :int
+ str.to_i
+ when :float, :number
+ str.to_f
+ else
+ str
+ end
+ end
+
+ def test(str)
+ return true if patterns.empty?
+
+ value = cast(str)
+
+ unless negated
+ patterns.any? {|pattern| pattern === value }
+ else
+ patterns.none? {|pattern| pattern === value }
+ end
+ end
+
+ def self.empty
+ VarPattern.new(patterns: [], negated: false)
+ end
end
- if tokens.first =~ /\A\p{Letter}/
- tokens.first.prepend('\b')
+ def test_variables(match)
+ variables.all? do |name, var|
+ str = match[name]
+ str && var.test(str)
+ end
end
- if tokens.last =~ /\p{Letter}\Z/
- tokens.last << '\b'
+ @@TYPES = {}
+
+ @@TYPES[:string] = -> (name) {
+ ::Regexp.union(
+ /"(?<#{name}>(?:[^"]|\")*)"/,
+ /'(?<#{name}>(?:[^']|\')*)'/
+ )
+ }
+
+ @@TYPES[:number] = -> (name) {
+ ::Regexp.union(
+ regexp_for_type(name: name, type: :int),
+ regexp_for_type(name: name, type: :float)
+ )
+ }
+
+ @@TYPES[:int] = -> (name) {
+ ::Regexp.union(
+ /(?<#{name}>[+-]?[1-9](:?\d|_\d)*)/,
+ /(?<#{name}>[+-]?0[dD][0-7]+)/,
+ /(?<#{name}>[+-]?0[oO]?[0-7]+)/,
+ /(?<#{name}>[+-]?0[xX][0-9a-fA-F]+)/,
+ /(?<#{name}>[+-]?0[bB][01]+)/
+ )
+ }
+
+ @@TYPES[:float] = -> (name) {
+ ::Regexp.union(
+ /(?<#{name}>[+-]?\d+\.\d*(:?e[+-]?\d+)?)/,
+ /(?<#{name}>[+-]?\d+(:?e[+-]?\d+)?)/
+ )
+ }
+
+ @@TYPES[:word] = -> (name) {
+ /(?<#{name}>\S+)/
+ }
+
+ @@TYPES[:identifier] = -> (name) {
+ /(?<#{name}>[a-zA-Z_]\w*)\b/
+ }
+
+ # From rails_autolink gem
+ # https://github.com/tenderlove/rails_autolink/blob/master/lib/rails_autolink/helpers.rb#L73
+ # With ')' support, which should be frequently used for markdown or CSS `url(...)`
+ AUTO_LINK_RE = %r{
+ (?: ((?:ed2k|ftp|http|https|irc|mailto|news|gopher|nntp|telnet|webcal|xmpp|callto|feed|svn|urn|aim|rsync|tag|ssh|sftp|rtsp|afs|file):)// | www\. )
+ [^\s<\u00A0")]+
+ }ix
+
+ # https://github.com/tenderlove/rails_autolink/blob/master/lib/rails_autolink/helpers.rb#L81-L82
+ AUTO_EMAIL_LOCAL_RE = /[\w.!#\$%&'*\/=?^`{|}~+-]/
+ AUTO_EMAIL_RE = /(?<!#{AUTO_EMAIL_LOCAL_RE})[\w.!#\$%+-]\.?#{AUTO_EMAIL_LOCAL_RE}*@[\w-]+(?:\.[\w-]+)+/
+
+ @@TYPES[:url] = -> (name) {
+ /\b(?<#{name}>#{AUTO_LINK_RE})/
+ }
+
+ @@TYPES[:email] = -> (name) {
+ /\b(?<#{name}>#{AUTO_EMAIL_RE})/
+ }
+
+ def self.regexp_for_type(name:, type:)
+ ty = type || :word
+ if @@TYPES.key?(ty)
+ @@TYPES[ty][name]
+ end
end
- options = Regexp::MULTILINE
- options |= Regexp::IGNORECASE unless case_sensitive
+ def self.compile_tokens(source, variables, case_sensitive:)
+ tokens = []
+ s = StringScanner.new(source)
- Regexp.new(tokens.join('\s*').gsub(/\\s\*(\\s\+\\s\*)+/, '\s+'), options)
+ until s.eos?
+ case
+ when s.scan(/\${(?<name>[a-zA-Z_]\w*)(?::(?<type>#{::Regexp.union(*@@TYPES.keys.map(&:to_s))}))?}/)
+ name = s[:name].to_sym
+ type = s[:type] && s[:type].to_sym
+
+ if variables.key?(name)
+ variables[name].type = type
+ regexp = regexp_for_type(name: name, type: type).to_s
+ if tokens.empty? && (type == :word || type == :identifier)
+ regexp = /\b#{regexp.to_s}/
+ end
+ tokens << regexp.to_s
+ else
+ tokens << ::Regexp.escape("${")
+ tokens << ::Regexp.escape(name.to_s)
+ tokens << ::Regexp.escape("}")
+ end
+ when s.scan(/\(|\)|\{|\}|\[|\]|\<|\>/)
+ tokens << ::Regexp.escape(s.matched)
+ when s.scan(/\s+/)
+ tokens << '\s+'
+ when s.scan(/\w+|[\p{L}&&\p{^ASCII}]+/)
+ tokens << ::Regexp.escape(s.matched)
+ when s.scan(%r{[!"#%&'=\-^~¥\\|`@*:+;/?.,]+})
+ tokens << ::Regexp.escape(s.matched.rstrip)
+ when s.scan(/./)
+ tokens << ::Regexp.escape(s.matched)
+ end
+ end
+
+ if tokens.first =~ /\A\p{L}/
+ tokens.first.prepend('\b')
+ end
+
+ if tokens.last =~ /\p{L}\Z/
+ tokens.last << '\b'
+ end
+
+ options = ::Regexp::MULTILINE
+ options |= ::Regexp::IGNORECASE unless case_sensitive
+
+ ::Regexp.new(tokens.join('\s*').gsub(/\\s\*(\\s\+\\s\*)+/, '\s+'), options)
+ end
end
end
end