module MCollective
  module Matcher
    class Scanner
      attr_accessor :arguments, :token_index

      def initialize(arguments)
        @token_index = 0
        @arguments = arguments.split("")
        @seperation_counter = 0
        @white_spaces = 0
      end

      # Scans the input string and identifies single language tokens
      def get_token
        if @token_index >= @arguments.size
          return nil
        end

        case @arguments[@token_index]
        when "("
          return "(", "("

        when ")"
          return ")", ")"

        when "n"
          if (@arguments[@token_index + 1] == "o") && (@arguments[@token_index + 2] == "t") && ((@arguments[@token_index + 3] == " ") || (@arguments[@token_index + 3] == "("))
            @token_index += 2
            return "not", "not"
          else
            gen_statement
          end

        when "!"
          return "not", "not"

        when "a"
          if (@arguments[@token_index + 1] == "n") && (@arguments[@token_index + 2] == "d") && ((@arguments[@token_index + 3] == " ") || (@arguments[@token_index + 3] == "("))
            @token_index += 2
            return "and", "and"
          else
            gen_statement
          end

        when "o"
          if (@arguments[@token_index + 1] == "r") && ((@arguments[@token_index + 2] == " ") || (@arguments[@token_index + 2] == "("))
            @token_index += 1
            return "or", "or"
          else
            gen_statement
          end

        when " "
          return " ", " "

        else
          gen_statement
        end
      end

      private
      # Helper generates a statement token
      def gen_statement
        func = false
        current_token_value = ""
        j = @token_index
        escaped = false

        begin
          if (@arguments[j] == "/")
            begin
              current_token_value << @arguments[j]
              j += 1
            end until (j >= @arguments.size) || (@arguments[j] =~ /\s/)
          elsif (@arguments[j] =~ /=|<|>/)
            while !(@arguments[j] =~ /=|<|>/)
              current_token_value << @arguments[j]
              j += 1
            end

            current_token_value << @arguments[j]
            j += 1

            if @arguments[j] == "/"
              begin
                current_token_value << @arguments[j]
                j += 1
                if @arguments[j] == "/"
                  current_token_value << "/"
                  break
                end
              end until (j >= @arguments.size) || (@arguments[j] =~ /\//)
              while (j < @arguments.size) && ((@arguments[j] != " ") && (@arguments[j] != ")"))
                current_token_value << @arguments[j]
                j += 1
              end
            end
          else
            begin
              # Identify and tokenize regular expressions by ignoring everything between /'s
              if @arguments[j] == '/'
                current_token_value << '/'
                j += 1
                while(j < @arguments.size && @arguments[j] != '/')
                  if  @arguments[j] == '\\'
                    # eat the escape char
                    current_token_value << @arguments[j]
                    j += 1
                    escaped = true
                  end

                  current_token_value << @arguments[j]
                  j += 1
                end
                current_token_value << @arguments[j] if @arguments[j]
                break
              end

              if @arguments[j] == "("
                func = true

                current_token_value << @arguments[j]
                j += 1

                while j < @arguments.size
                  current_token_value << @arguments[j]
                  if @arguments[j] == ')'
                    j += 1
                    break
                  end
                  j += 1
                end
              elsif @arguments[j] == '"' || @arguments[j] == "'"
                escaped = true
                escaped_with = @arguments[j]

                j += 1 # step over first " or '
                @white_spaces += 1
                # identified "..." or '...'
                while j < @arguments.size
                  if  @arguments[j] == '\\'
                    # eat the escape char but don't add it to the token, or we
                    # end up with \\\"
                    j += 1
                    @white_spaces += 1
                    unless j < @arguments.size
                      break
                    end
                  elsif @arguments[j] == escaped_with
                    j += 1
                    @white_spaces += 1
                    break
                  end
                  current_token_value << @arguments[j]
                  j += 1
                end
              else
                current_token_value << @arguments[j]
                j += 1
              end

              if(@arguments[j] == ' ')
                break if(is_klass?(j) && !(@arguments[j-1] =~ /=|<|>/))
              end

              if( (@arguments[j] == ' ') && (@seperation_counter < 2) && !(current_token_value.match(/^.+(=|<|>).+$/)) )
                if((index = lookahead(j)))
                  j = index
                end
              end
            end until (j >= @arguments.size) || (@arguments[j] =~ /\s|\)/)
            @seperation_counter = 0
          end
        rescue Exception => e
          raise "An exception was raised while trying to tokenize '#{current_token_value} - #{e}'"
        end

        @token_index += current_token_value.size + @white_spaces - 1
        @white_spaces = 0

        # bar(
        if current_token_value.match(/.+?\($/)
          return "bad_token", [@token_index - current_token_value.size + 1, @token_index]
        # /foo/=bar
        elsif current_token_value.match(/^\/.+?\/(<|>|=).+/)
          return "bad_token", [@token_index - current_token_value.size + 1, @token_index]
        elsif current_token_value.match(/^.+?\/(<|>|=).+/)
          return "bad_token", [@token_index - current_token_value.size + 1, @token_index]
        else
          if func
            if current_token_value.match(/^.+?\((\s*(')[^']*(')\s*(,\s*(')[^']*('))*)?\)(\.[a-zA-Z0-9_]+)?((!=|<=|>=|=|>|<).+)?$/) ||
               current_token_value.match(/^.+?\((\s*(")[^"]*(")\s*(,\s*(")[^"]*("))*)?\)(\.[a-zA-Z0-9_]+)?((!=|<=|>=|=|>|<).+)?$/)
              return "fstatement", current_token_value
            else
              return "bad_token", [@token_index - current_token_value.size + 1, @token_index]
            end
          else
            if escaped
              return "statement", current_token_value
            end
            slash_err = false
            current_token_value.split('').each do |c|
              if c == '/'
                slash_err = !slash_err
              end
            end
            return "bad_token", [@token_index - current_token_value.size + 1, @token_index] if slash_err
            return "statement", current_token_value
          end
        end
      end

      # Deal with special puppet class statement
      def is_klass?(j)
        while(j < @arguments.size && @arguments[j] == ' ')
          j += 1
        end

        if @arguments[j] =~ /=|<|>/
          return false
        else
          return true
        end
      end

      # Eat spaces while looking for the next comparison symbol
      def lookahead(index)
        index += 1
        while(index <= @arguments.size)
          @white_spaces += 1
          unless(@arguments[index] =~ /\s/)
            @seperation_counter +=1
            return index
          end
          index += 1
        end
        return nil
      end
    end
  end
end