// Copyright (C) 2005  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.


#include <string>
#include <sstream>

#include <dlib/tokenizer.h>
#include "tester.h"

namespace  
{
    using namespace test;
    using namespace std;
    using namespace dlib;
  
    logger dlog("test.tokenizer");

    template <
        typename tok
        >
    void tokenizer_kernel_test (
    )
    /*!
        requires
            - tok is an implementation of tokenizer_kernel_abstract.h
        ensures
            - runs tests on tok for compliance with the specs 
    !*/
    {        

        print_spinner();

        tok test;

        DLIB_TEST(test.numbers() == "0123456789");
        DLIB_TEST(test.uppercase_letters() == "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
        DLIB_TEST(test.lowercase_letters() == "abcdefghijklmnopqrstuvwxyz");

        DLIB_TEST_MSG(test.get_identifier_body() == "_" + test.lowercase_letters() +
                     test.uppercase_letters() + test.numbers(),"");
        DLIB_TEST_MSG(test.get_identifier_head() == "_" + test.lowercase_letters() +
                     test.uppercase_letters(),"");

        DLIB_TEST(test.stream_is_set() == false);
        test.clear();
        DLIB_TEST(test.stream_is_set() == false);

        DLIB_TEST_MSG(test.get_identifier_body() == "_" + test.lowercase_letters() +
                     test.uppercase_letters() + test.numbers(),"");
        DLIB_TEST_MSG(test.get_identifier_head() == "_" + test.lowercase_letters() +
                     test.uppercase_letters(),"");

        tok test2;

        ostringstream sout;
        istringstream sin;
        test2.set_stream(sin);

        DLIB_TEST(test2.stream_is_set());
        DLIB_TEST(&test2.get_stream() == &sin);

        int type;
        string token;

        test2.get_token(type,token);
        DLIB_TEST(type == tok::END_OF_FILE);
        test2.get_token(type,token);
        DLIB_TEST(type == tok::END_OF_FILE);
        test2.get_token(type,token);
        DLIB_TEST(type == tok::END_OF_FILE);            


        sin.clear();
        sin.str("  The cat 123asdf1234 ._ \n test.");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST(token == "  ");

        DLIB_TEST(test2.peek_type() == tok::IDENTIFIER);
        DLIB_TEST(test2.peek_token() == "The"); 
        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST(token == "The");            

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST(token == " ");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST(token == "cat");            

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST(token == " ");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::NUMBER);
        DLIB_TEST_MSG(token == "123","token: " << token);

        DLIB_TEST(test2.peek_type() == tok::IDENTIFIER);
        DLIB_TEST(test2.peek_token() == "asdf1234");
        DLIB_TEST(test2.peek_type() == tok::IDENTIFIER);
        DLIB_TEST(test2.peek_token() == "asdf1234");
        DLIB_TEST(test2.peek_type() == tok::IDENTIFIER);
        DLIB_TEST(test2.peek_token() == "asdf1234");
        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST(token == "asdf1234");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST_MSG(token == " ","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::CHAR);
        DLIB_TEST_MSG(token == ".","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST(token == "_");

        DLIB_TEST(test2.peek_type() == tok::WHITE_SPACE);
        DLIB_TEST_MSG(test2.peek_token() == " ","token: \"" << token << "\"" <<
                     "\ntoken size: " << (unsigned int)token.size());

        swap(test,test2);

        DLIB_TEST(test2.stream_is_set() == false);

        DLIB_TEST(test.peek_type() == tok::WHITE_SPACE);
        DLIB_TEST_MSG(test.peek_token() == " ","token: \"" << token << "\"" <<
                     "\ntoken size: " << (unsigned int)token.size());
        test.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST_MSG(token == " ","token: \"" << token << "\"" <<
                     "\ntoken size: " << (unsigned int)token.size());

        test.get_token(type,token);
        DLIB_TEST_MSG(type == tok::END_OF_LINE,"token: " << token);
        DLIB_TEST_MSG(token == "\n","token: " << token);

        swap(test,test2);
        DLIB_TEST(test.stream_is_set() == false);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST_MSG(token == " ","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST_MSG(token == "test","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::CHAR);
        DLIB_TEST_MSG(token == ".","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::END_OF_FILE);










        test2.set_identifier_token("_" + test.uppercase_letters() +
                                   test.lowercase_letters(),test.numbers() + "_" + test.uppercase_letters()
                                   +test.lowercase_letters());


        sin.clear();
        sin.str("  The cat 123asdf1234 ._ \n\r test.");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST(token == "  ");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST(token == "The");            

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST(token == " ");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST(token == "cat");            

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST(token == " ");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::NUMBER);
        DLIB_TEST_MSG(token == "123","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST(token == "asdf1234");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST_MSG(token == " ","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::CHAR);
        DLIB_TEST_MSG(token == ".","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST(token == "_");

        swap(test,test2);

        DLIB_TEST(test2.stream_is_set() == false);

        test.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST_MSG(token == " ","token: \"" << token << "\"" <<
                     "\ntoken size: " << (unsigned int)token.size());

        test.get_token(type,token);
        DLIB_TEST_MSG(type == tok::END_OF_LINE,"token: " << token);
        DLIB_TEST_MSG(token == "\n","token: " << token);

        swap(test,test2);
        DLIB_TEST(test.stream_is_set() == false);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST_MSG(token == "\r ","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST_MSG(token == "test","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::CHAR);
        DLIB_TEST_MSG(token == ".","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::END_OF_FILE);













        test2.set_identifier_token(test.uppercase_letters() +
                                   test.lowercase_letters(),test.numbers() + test.uppercase_letters()
                                   +test.lowercase_letters());


        sin.clear();
        sin.str("  The cat 123as_df1234 ._ \n test.");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST(token == "  ");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST(token == "The");            

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST(token == " ");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST(token == "cat");            

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST(token == " ");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::NUMBER);
        DLIB_TEST_MSG(token == "123","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST(token == "as");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::CHAR);
        DLIB_TEST_MSG(token == "_","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST(token == "df1234");

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST_MSG(token == " ","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::CHAR);
        DLIB_TEST_MSG(token == ".","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::CHAR);
        DLIB_TEST(token == "_");

        swap(test,test2);

        DLIB_TEST(test2.stream_is_set() == false);

        test.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST_MSG(token == " ","token: \"" << token << "\"" <<
                     "\ntoken size: " << (unsigned int)token.size());

        test.get_token(type,token);
        DLIB_TEST_MSG(type == tok::END_OF_LINE,"token: " << token);
        DLIB_TEST_MSG(token == "\n","token: " << token);

        swap(test,test2);
        DLIB_TEST(test.stream_is_set() == false);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::WHITE_SPACE);
        DLIB_TEST_MSG(token == " ","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::IDENTIFIER);
        DLIB_TEST_MSG(token == "test","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::CHAR);
        DLIB_TEST_MSG(token == ".","token: " << token);

        test2.get_token(type,token);
        DLIB_TEST(type == tok::END_OF_FILE);


    }





    class tokenizer_tester : public tester
    {
    public:
        tokenizer_tester (
        ) :
            tester ("test_tokenizer",
                    "Runs tests on the tokenizer component.")
        {}

        void perform_test (
        )
        {
            dlog << LINFO << "testing kernel_1a";
            tokenizer_kernel_test<tokenizer::kernel_1a>  ();
            dlog << LINFO << "testing kernel_1a_c";
            tokenizer_kernel_test<tokenizer::kernel_1a_c>();
        }
    } a;

}