// Copyright (C) 2005 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #include <string> #include <sstream> #include <dlib/tokenizer.h> #include "tester.h" namespace { using namespace test; using namespace std; using namespace dlib; logger dlog("test.tokenizer"); template < typename tok > void tokenizer_kernel_test ( ) /*! requires - tok is an implementation of tokenizer_kernel_abstract.h ensures - runs tests on tok for compliance with the specs !*/ { print_spinner(); tok test; DLIB_TEST(test.numbers() == "0123456789"); DLIB_TEST(test.uppercase_letters() == "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); DLIB_TEST(test.lowercase_letters() == "abcdefghijklmnopqrstuvwxyz"); DLIB_TEST_MSG(test.get_identifier_body() == "_" + test.lowercase_letters() + test.uppercase_letters() + test.numbers(),""); DLIB_TEST_MSG(test.get_identifier_head() == "_" + test.lowercase_letters() + test.uppercase_letters(),""); DLIB_TEST(test.stream_is_set() == false); test.clear(); DLIB_TEST(test.stream_is_set() == false); DLIB_TEST_MSG(test.get_identifier_body() == "_" + test.lowercase_letters() + test.uppercase_letters() + test.numbers(),""); DLIB_TEST_MSG(test.get_identifier_head() == "_" + test.lowercase_letters() + test.uppercase_letters(),""); tok test2; ostringstream sout; istringstream sin; test2.set_stream(sin); DLIB_TEST(test2.stream_is_set()); DLIB_TEST(&test2.get_stream() == &sin); int type; string token; test2.get_token(type,token); DLIB_TEST(type == tok::END_OF_FILE); test2.get_token(type,token); DLIB_TEST(type == tok::END_OF_FILE); test2.get_token(type,token); DLIB_TEST(type == tok::END_OF_FILE); sin.clear(); sin.str(" The cat 123asdf1234 ._ \n test."); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST(token == " "); DLIB_TEST(test2.peek_type() == tok::IDENTIFIER); DLIB_TEST(test2.peek_token() == "The"); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST(token == "The"); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST(token == " "); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST(token == "cat"); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST(token == " "); test2.get_token(type,token); DLIB_TEST(type == tok::NUMBER); DLIB_TEST_MSG(token == "123","token: " << token); DLIB_TEST(test2.peek_type() == tok::IDENTIFIER); DLIB_TEST(test2.peek_token() == "asdf1234"); DLIB_TEST(test2.peek_type() == tok::IDENTIFIER); DLIB_TEST(test2.peek_token() == "asdf1234"); DLIB_TEST(test2.peek_type() == tok::IDENTIFIER); DLIB_TEST(test2.peek_token() == "asdf1234"); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST(token == "asdf1234"); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST_MSG(token == " ","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::CHAR); DLIB_TEST_MSG(token == ".","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST(token == "_"); DLIB_TEST(test2.peek_type() == tok::WHITE_SPACE); DLIB_TEST_MSG(test2.peek_token() == " ","token: \"" << token << "\"" << "\ntoken size: " << (unsigned int)token.size()); swap(test,test2); DLIB_TEST(test2.stream_is_set() == false); DLIB_TEST(test.peek_type() == tok::WHITE_SPACE); DLIB_TEST_MSG(test.peek_token() == " ","token: \"" << token << "\"" << "\ntoken size: " << (unsigned int)token.size()); test.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST_MSG(token == " ","token: \"" << token << "\"" << "\ntoken size: " << (unsigned int)token.size()); test.get_token(type,token); DLIB_TEST_MSG(type == tok::END_OF_LINE,"token: " << token); DLIB_TEST_MSG(token == "\n","token: " << token); swap(test,test2); DLIB_TEST(test.stream_is_set() == false); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST_MSG(token == " ","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST_MSG(token == "test","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::CHAR); DLIB_TEST_MSG(token == ".","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::END_OF_FILE); test2.set_identifier_token("_" + test.uppercase_letters() + test.lowercase_letters(),test.numbers() + "_" + test.uppercase_letters() +test.lowercase_letters()); sin.clear(); sin.str(" The cat 123asdf1234 ._ \n\r test."); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST(token == " "); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST(token == "The"); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST(token == " "); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST(token == "cat"); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST(token == " "); test2.get_token(type,token); DLIB_TEST(type == tok::NUMBER); DLIB_TEST_MSG(token == "123","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST(token == "asdf1234"); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST_MSG(token == " ","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::CHAR); DLIB_TEST_MSG(token == ".","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST(token == "_"); swap(test,test2); DLIB_TEST(test2.stream_is_set() == false); test.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST_MSG(token == " ","token: \"" << token << "\"" << "\ntoken size: " << (unsigned int)token.size()); test.get_token(type,token); DLIB_TEST_MSG(type == tok::END_OF_LINE,"token: " << token); DLIB_TEST_MSG(token == "\n","token: " << token); swap(test,test2); DLIB_TEST(test.stream_is_set() == false); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST_MSG(token == "\r ","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST_MSG(token == "test","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::CHAR); DLIB_TEST_MSG(token == ".","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::END_OF_FILE); test2.set_identifier_token(test.uppercase_letters() + test.lowercase_letters(),test.numbers() + test.uppercase_letters() +test.lowercase_letters()); sin.clear(); sin.str(" The cat 123as_df1234 ._ \n test."); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST(token == " "); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST(token == "The"); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST(token == " "); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST(token == "cat"); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST(token == " "); test2.get_token(type,token); DLIB_TEST(type == tok::NUMBER); DLIB_TEST_MSG(token == "123","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST(token == "as"); test2.get_token(type,token); DLIB_TEST(type == tok::CHAR); DLIB_TEST_MSG(token == "_","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST(token == "df1234"); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST_MSG(token == " ","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::CHAR); DLIB_TEST_MSG(token == ".","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::CHAR); DLIB_TEST(token == "_"); swap(test,test2); DLIB_TEST(test2.stream_is_set() == false); test.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST_MSG(token == " ","token: \"" << token << "\"" << "\ntoken size: " << (unsigned int)token.size()); test.get_token(type,token); DLIB_TEST_MSG(type == tok::END_OF_LINE,"token: " << token); DLIB_TEST_MSG(token == "\n","token: " << token); swap(test,test2); DLIB_TEST(test.stream_is_set() == false); test2.get_token(type,token); DLIB_TEST(type == tok::WHITE_SPACE); DLIB_TEST_MSG(token == " ","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::IDENTIFIER); DLIB_TEST_MSG(token == "test","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::CHAR); DLIB_TEST_MSG(token == ".","token: " << token); test2.get_token(type,token); DLIB_TEST(type == tok::END_OF_FILE); } class tokenizer_tester : public tester { public: tokenizer_tester ( ) : tester ("test_tokenizer", "Runs tests on the tokenizer component.") {} void perform_test ( ) { dlog << LINFO << "testing kernel_1a"; tokenizer_kernel_test<tokenizer::kernel_1a> (); dlog << LINFO << "testing kernel_1a_c"; tokenizer_kernel_test<tokenizer::kernel_1a_c>(); } } a; }