# encoding: utf-8

require 'pathspec/regexspec'

class GitIgnoreSpec < RegexSpec
  attr_reader :regex

  def initialize(pattern)
    pattern = pattern.strip unless pattern.nil?

    # A pattern starting with a hash ('#') serves as a comment
    # (neither includes nor excludes files). Escape the hash with a
    # back-slash to match a literal hash (i.e., '\#').
    if pattern.start_with?('#')
      @regex = nil
      @inclusive = nil

    # A blank pattern is a null-operation (neither includes nor
    # excludes files).
    elsif pattern.empty?
      @regex = nil
      @inclusive = nil

    # Patterns containing three or more consecutive stars are invalid and
    # will be ignored.
    elsif pattern =~ /\*\*\*+/
      @regex = nil
      @inclusive = nil

    # We have a valid pattern!
    else
      # A pattern starting with an exclamation mark ('!') negates the
      # pattern (exclude instead of include). Escape the exclamation
      # mark with a back-slash to match a literal exclamation mark
      # (i.e., '\!').
      if pattern.start_with?('!')
        @inclusive = false
        # Remove leading exclamation mark.
        pattern = pattern[1..-1]
      else
        @inclusive = true
      end

      # Remove leading back-slash escape for escaped hash ('#') or
      # exclamation mark ('!').
      if pattern.start_with?('\\')
        pattern = pattern[1..-1]
      end

      # Split pattern into segments. -1 to allow trailing slashes.
      pattern_segs = pattern.split('/', -1)

      # Normalize pattern to make processing easier.

      # A pattern beginning with a slash ('/') will only match paths
      # directly on the root directory instead of any descendant
      # paths. So, remove empty first segment to make pattern relative
      # to root.
      if pattern_segs[0].empty?
        pattern_segs.shift
      else
        # A pattern without a beginning slash ('/') will match any
        # descendant path. This is equivilent to "**/{pattern}". So,
        # prepend with double-asterisks to make pattern relative to
        # root.
        if pattern_segs.length == 1 && pattern_segs[0] != '**'
          pattern_segs.insert(0, '**')
        end
      end

      # A pattern ending with a slash ('/') will match all descendant
      # paths of if it is a directory but not if it is a regular file.
      # This is equivilent to "{pattern}/**". So, set last segment to
      # double asterisks to include all descendants.
      if pattern_segs[-1].empty?
        pattern_segs[-1] = '**'
      end

      # Handle platforms with backslash separated paths
      if File::SEPARATOR == '\\'
        path_sep = '\\\\'
      else
        path_sep = '/'
      end


      # Build regular expression from pattern.
      regex = '^'
      need_slash = false
      regex_end = pattern_segs.size - 1
      pattern_segs.each_index do |i|
        seg = pattern_segs[i]

        if seg == '**'
          # A pattern consisting solely of double-asterisks ('**')
          # will match every path.
          if i == 0 && i == regex_end
            regex.concat('.+')

          # A normalized pattern beginning with double-asterisks
          # ('**') will match any leading path segments.
          elsif i == 0
            regex.concat("(?:.+#{path_sep})?")
            need_slash = false

          # A normalized pattern ending with double-asterisks ('**')
          # will match any trailing path segments.
          elsif i == regex_end
            regex.concat("#{path_sep}.*")

          # A pattern with inner double-asterisks ('**') will match
          # multiple (or zero) inner path segments.
          else
            regex.concat("(?:#{path_sep}.+)?")
            need_slash = true
          end

        # Match single path segment.
        elsif seg == '*'
          if need_slash
            regex.concat(path_sep)
          end

          regex.concat("[^#{path_sep}]+")
          need_slash = true

        else
          # Match segment glob pattern.
          if need_slash
            regex.concat(path_sep)
          end

          regex.concat(translate_segment_glob(seg))
          need_slash = true
        end
      end

      regex.concat('$')
      super(regex)
    end
  end

  def match(path)
    super(path)
  end

  def translate_segment_glob(pattern)
    """
    Translates the glob pattern to a regular expression. This is used in
    the constructor to translate a path segment glob pattern to its
    corresponding regular expression.

    *pattern* (``str``) is the glob pattern.

    Returns the regular expression (``str``).
    """
    # NOTE: This is derived from `fnmatch.translate()` and is similar to
    # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.

    escape = false
    regex = ''
    i = 0

    while i < pattern.size
      # Get next character.
      char = pattern[i].chr
      i += 1

      # Escape the character.
      if escape
        escape = false
        regex += Regexp.escape(char)

      # Escape character, escape next character.
      elsif char == '\\'
        escape = true

      # Multi-character wildcard. Match any string (except slashes),
      # including an empty string.
      elsif char == '*'
        regex += '[^/]*'

      # Single-character wildcard. Match any single character (except
      # a slash).
      elsif char == '?'
        regex += '[^/]'

      # Braket expression wildcard. Except for the beginning
      # exclamation mark, the whole braket expression can be used
      # directly as regex but we have to find where the expression
      # ends.
      # - "[][!]" matchs ']', '[' and '!'.
      # - "[]-]" matchs ']' and '-'.
      # - "[!]a-]" matchs any character except ']', 'a' and '-'.
      elsif char == '['
        j = i
        # Pass brack expression negation.
        if j < pattern.size && pattern[j].chr == '!'
          j += 1
        end

        # Pass first closing braket if it is at the beginning of the
        # expression.
        if j < pattern.size && pattern[j].chr == ']'
          j += 1
        end

        # Find closing braket. Stop once we reach the end or find it.
        while j < pattern.size && pattern[j].chr != ']'
          j += 1
        end


        if j < pattern.size
          expr = '['

          # Braket expression needs to be negated.
          if pattern[i].chr == '!'
            expr += '^'
            i += 1

          # POSIX declares that the regex braket expression negation
          # "[^...]" is undefined in a glob pattern. Python's
          # `fnmatch.translate()` escapes the caret ('^') as a
          # literal. To maintain consistency with undefined behavior,
          # I am escaping the '^' as well.
          elsif pattern[i].chr == '^'
            expr += '\\^'
            i += 1
          end

          # Escape brackets contained within pattern
          if pattern[i].chr == ']' && i != j
            expr += '\]'
            i += 1
          end


          # Build regex braket expression. Escape slashes so they are
          # treated as literal slashes by regex as defined by POSIX.
          expr += pattern[i..j].sub('\\', '\\\\')

          # Add regex braket expression to regex result.
          regex += expr

          # Found end of braket expression. Increment j to be one past
          # the closing braket:
          #
          #  [...]
          #   ^   ^
          #   i   j
          #
          j += 1
          # Set i to one past the closing braket.
          i = j

        # Failed to find closing braket, treat opening braket as a
        # braket literal instead of as an expression.
        else
          regex += '\['
        end

      # Regular character, escape it for regex.
      else
        regex << Regexp.escape(char)
      end
    end

    regex
  end

  def inclusive?
    @inclusive
  end
end