require 'chef_fs'
require 'chef_fs/path_utils'
module ChefFS
#
# Represents a glob pattern. This class is designed so that it can
# match arbitrary strings, and tell you about partial matches.
#
# Examples:
# * a*z
# - Matches abcz
# - Does not match ab/cd/ez
# - Does not match xabcz
# * a**z
# - Matches abcz
# - Matches ab/cd/ez
#
# Special characters supported:
# * / (and \\ on Windows) - directory separators
# * \* - match zero or more characters (but not directory separators)
# * \*\* - match zero or more characters, including directory separators
# * ? - match exactly one character (not a directory separator)
# Only on Unix:
# * [abc0-9] - match one of the included characters
# * \\ - escape character: match the given character
#
class FilePattern
# Initialize a new FilePattern with the pattern string.
#
# Raises +ArgumentError+ if empty file pattern is specified
def initialize(pattern)
@pattern = pattern
end
# The pattern string.
attr_reader :pattern
# Reports whether this pattern could match children of path.
# If the pattern doesn't match the path up to this point or
# if it matches and doesn't allow further children, this will
# return false.
#
# ==== Attributes
#
# * +path+ - a path to check
#
# ==== Examples
#
# abc/def.could_match_children?('abc') == true
# abc.could_match_children?('abc') == false
# abc/def.could_match_children?('x') == false
# a**z.could_match_children?('ab/cd') == true
def could_match_children?(path)
return false if path == '' # Empty string is not a path
argument_is_absolute = !!(path =~ /^#{ChefFS::PathUtils::regexp_path_separator}/)
return false if is_absolute != argument_is_absolute
path = path[1,path.length-1] if argument_is_absolute
path_parts = ChefFS::PathUtils::split(path)
# If the pattern is shorter than the path (or same size), children will be larger than the pattern, and will not match.
return false if regexp_parts.length <= path_parts.length && !has_double_star
# If the path doesn't match up to this point, children won't match either.
return false if path_parts.zip(regexp_parts).any? { |part,regexp| !regexp.nil? && !regexp.match(part) }
# Otherwise, it's possible we could match: the path matches to this point, and the pattern is longer than the path.
# TODO There is one edge case where the double star comes after some characters like abc**def--we could check whether the next
# bit of path starts with abc in that case.
return true
end
# Returns the immediate child of a path that would be matched
# if this FilePattern was applied. If more than one child
# could match, this method returns nil.
#
# ==== Attributes
#
# * +path+ - The path to look for an exact child name under.
#
# ==== Returns
#
# The next directory in the pattern under the given path.
# If the directory part could match more than one child, it
# returns +nil+.
#
# ==== Examples
#
# abc/def.exact_child_name_under('abc') == 'def'
# abc/def/ghi.exact_child_name_under('abc') == 'def'
# abc/*/ghi.exact_child_name_under('abc') == nil
# abc/*/ghi.exact_child_name_under('abc/def') == 'ghi'
# abc/**/ghi.exact_child_name_under('abc/def') == nil
#
# This method assumes +could_match_children?(path)+ is +true+.
def exact_child_name_under(path)
path = path[1,path.length-1] if !!(path =~ /^#{ChefFS::PathUtils::regexp_path_separator}/)
dirs_in_path = ChefFS::PathUtils::split(path).length
return nil if exact_parts.length <= dirs_in_path
return exact_parts[dirs_in_path]
end
# If this pattern represents an exact path, returns the exact path.
#
# abc/def.exact_path == 'abc/def'
# abc/*def.exact_path == 'abc/def'
# abc/x\\yz.exact_path == 'abc/xyz'
def exact_path
return nil if has_double_star || exact_parts.any? { |part| part.nil? }
result = ChefFS::PathUtils::join(*exact_parts)
is_absolute ? ChefFS::PathUtils::join('', result) : result
end
# Returns the normalized version of the pattern, with / as the directory
# separator, and "." and ".." removed.
#
# This does not presently change things like \b to b, but in the future
# it might.
def normalized_pattern
calculate
@normalized_pattern
end
# Tell whether this pattern matches absolute, or relative paths
def is_absolute
calculate
@is_absolute
end
# Returns true+ if this pattern matches the path, false+ otherwise.
#
# abc/*/def.match?('abc/foo/def') == true
# abc/*/def.match?('abc/foo') == false
def match?(path)
argument_is_absolute = !!(path =~ /^#{ChefFS::PathUtils::regexp_path_separator}/)
return false if is_absolute != argument_is_absolute
path = path[1,path.length-1] if argument_is_absolute
!!regexp.match(path)
end
# Returns the string pattern
def to_s
pattern
end
# Given a relative file pattern and a directory, makes a new file pattern
# starting with the directory.
#
# FilePattern.relative_to('/usr/local', 'bin/*grok') == FilePattern.new('/usr/local/bin/*grok')
#
# BUG: this does not support patterns starting with ..
def self.relative_to(dir, pattern)
return FilePattern.new(pattern) if pattern =~ /^#{ChefFS::PathUtils::regexp_path_separator}/
FilePattern.new(ChefFS::PathUtils::join(dir, pattern))
end
private
def regexp
calculate
@regexp
end
def regexp_parts
calculate
@regexp_parts
end
def exact_parts
calculate
@exact_parts
end
def has_double_star
calculate
@has_double_star
end
def calculate
if !@regexp
@is_absolute = !!(@pattern =~ /^#{ChefFS::PathUtils::regexp_path_separator}/)
full_regexp_parts = []
normalized_parts = []
@regexp_parts = []
@exact_parts = []
@has_double_star = false
ChefFS::PathUtils::split(pattern).each do |part|
regexp, exact, has_double_star = FilePattern::pattern_to_regexp(part)
if has_double_star
@has_double_star = true
end
# Skip // and /./ (pretend it's not there)
if exact == '' || exact == '.'
next
end
# Back up when you see .. (unless the prior part has ** in it, in which case .. must be preserved)
if exact == '..'
if @is_absolute && normalized_parts.length == 0
# If we are at the root, just pretend the .. isn't there
next
elsif normalized_parts.length > 0
regexp_prev, exact_prev, has_double_star_prev = FilePattern.pattern_to_regexp(normalized_parts[-1])
if has_double_star_prev
raise ArgumentError, ".. overlapping a ** is unsupported"
end
full_regexp_parts.pop
normalized_parts.pop
if !@has_double_star
@regexp_parts.pop
@exact_parts.pop
end
next
end
end
# Build up the regexp
full_regexp_parts << regexp
normalized_parts << part
if !@has_double_star
@regexp_parts << Regexp.new("^#{regexp}$")
@exact_parts << exact
end
end
@regexp = Regexp.new("^#{full_regexp_parts.join(ChefFS::PathUtils::regexp_path_separator)}$")
@normalized_pattern = ChefFS::PathUtils.join(*normalized_parts)
@normalized_pattern = ChefFS::PathUtils.join('', @normalized_pattern) if @is_absolute
end
end
def self.pattern_special_characters
if ChefFS::windows?
@pattern_special_characters ||= /(\*\*|\*|\?|[\*\?\.\|\(\)\[\]\{\}\+\\\\\^\$])/
else
# Unix also supports character regexes and backslashes
@pattern_special_characters ||= /(\\.|\[[^\]]+\]|\*\*|\*|\?|[\*\?\.\|\(\)\[\]\{\}\+\\\\\^\$])/
end
@pattern_special_characters
end
def self.regexp_escape_characters
[ '[', '\\', '^', '$', '.', '|', '?', '*', '+', '(', ')', '{', '}' ]
end
def self.pattern_to_regexp(pattern)
regexp = ""
exact = ""
has_double_star = false
pattern.split(pattern_special_characters).each_with_index do |part, index|
# Odd indexes from the split are symbols. Even are normal bits.
if index % 2 == 0
exact << part if !exact.nil?
regexp << part
else
case part
# **, * and ? happen on both platforms.
when '**'
exact = nil
has_double_star = true
regexp << '.*'
when '*'
exact = nil
regexp << '[^\/]*'
when '?'
exact = nil
regexp << '.'
else
if part[0,1] == '\\' && part.length == 2
# backslash escapes are only supported on Unix, and are handled here by leaving the escape on (it means the same thing in a regex)
exact << part[1,1] if !exact.nil?
if regexp_escape_characters.include?(part[1,1])
regexp << part
else
regexp << part[1,1]
end
elsif part[0,1] == '[' && part.length > 1
# [...] happens only on Unix, and is handled here by *not* backslashing (it means the same thing in and out of regex)
exact = nil
regexp << part
else
exact += part if !exact.nil?
regexp << "\\#{part}"
end
end
end
end
[regexp, exact, has_double_star]
end
end
end