This repository has been archived on 2023-07-19. You can view files and clone it, but cannot push or open issues or pull requests.
plasp/lib/tokenize/tests/TestTokenizer.cpp

333 lines
9.2 KiB
C++
Raw Normal View History

2016-10-08 16:03:14 +02:00
#include <catch.hpp>
2017-05-12 14:17:57 +02:00
#include <tokenize/Tokenizer.h>
#include <tokenize/TokenizerException.h>
////////////////////////////////////////////////////////////////////////////////////////////////////
2017-05-12 14:17:57 +02:00
TEST_CASE("[tokenizer] Simple strings are tokenized correctly", "[tokenizer]")
2016-05-24 02:23:56 +02:00
{
2016-08-07 16:46:48 +02:00
std::stringstream s(" identifier 5 \n-51\t 0 1 100 200 -300 -400");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p("input", s);
REQUIRE(p.get<std::string>() == "identifier");
REQUIRE(p.get<size_t>() == 5u);
REQUIRE(p.get<int>() == -51);
REQUIRE(p.get<bool>() == false);
REQUIRE(p.get<bool>() == true);
REQUIRE(p.get<int>() == 100);
REQUIRE(p.get<size_t>() == 200u);
REQUIRE(p.get<int>() == -300);
REQUIRE_THROWS_AS(p.get<size_t>(), tokenize::TokenizerException);
2016-05-27 03:58:59 +02:00
}
////////////////////////////////////////////////////////////////////////////////////////////////////
2017-05-12 14:17:57 +02:00
TEST_CASE("[tokenizer] Tokenizing exceptions are correctly reported", "[tokenizer]")
2016-05-27 03:58:59 +02:00
{
2016-08-07 16:46:48 +02:00
std::stringstream s(" identifier 5 \n-51\t 0 1 100 200 -300 -400");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p("input", s);
2016-05-27 03:58:59 +02:00
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.expect<std::string>("identifier"));
REQUIRE_NOTHROW(p.expect<size_t>(5u));
REQUIRE_NOTHROW(p.expect<int>(-51));
REQUIRE_NOTHROW(p.expect<bool>(false));
REQUIRE_NOTHROW(p.expect<bool>(true));
2016-08-07 16:46:48 +02:00
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.expect<int>(100));
REQUIRE_NOTHROW(p.expect<size_t>(200u));
REQUIRE_NOTHROW(p.expect<int>(-300));
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p.expect<size_t>(-400), tokenize::TokenizerException);
2016-08-07 16:46:48 +02:00
p.seek(0);
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p.expect<std::string>("error"), tokenize::TokenizerException);
2016-08-07 16:46:48 +02:00
p.seek(14);
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p.expect<size_t>(6u), tokenize::TokenizerException);
2016-08-07 16:46:48 +02:00
p.seek(17);
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p.expect<int>(-50), tokenize::TokenizerException);
2016-08-07 16:46:48 +02:00
p.seek(24);
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p.expect<bool>(true), tokenize::TokenizerException);
2016-08-07 16:46:48 +02:00
p.seek(26);
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p.expect<bool>(false), tokenize::TokenizerException);
2016-08-07 16:46:48 +02:00
p.seek(28);
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p.expect<int>(101), tokenize::TokenizerException);
2016-08-07 16:46:48 +02:00
p.seek(31);
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p.expect<size_t>(201), tokenize::TokenizerException);
2016-08-07 16:46:48 +02:00
p.seek(34);
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p.expect<int>(-299), tokenize::TokenizerException);
2016-05-27 03:58:59 +02:00
}
////////////////////////////////////////////////////////////////////////////////////////////////////
2017-05-12 14:17:57 +02:00
TEST_CASE("[tokenizer] While tokenizing, the cursor position is as expected", "[tokenizer]")
2016-08-08 12:40:02 +02:00
{
std::stringstream s(" identifier 5 \n-51\t 0 1");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p("input", s);
2016-08-08 12:40:02 +02:00
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<>::Position pos;
2016-08-08 12:40:02 +02:00
pos = p.position();
2016-10-08 16:03:14 +02:00
REQUIRE(p.testAndReturn<std::string>("error") == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndReturn<std::string>("identifier") == true);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<std::string>("error") == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<std::string>("identifier") == true);
REQUIRE(p.position() == 12);
2016-08-08 12:40:02 +02:00
pos = p.position();
2016-10-08 16:03:14 +02:00
REQUIRE(p.testAndReturn<size_t>(6u) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndReturn<size_t>(5u) == true);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<size_t>(6u) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<size_t>(5u) == true);
REQUIRE(p.position() == 15);
2016-08-08 12:40:02 +02:00
pos = p.position();
2016-10-08 16:03:14 +02:00
REQUIRE(p.testAndReturn<int>(-50) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndReturn<int>(-51) == true);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<int>(-50) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<int>(-51) == true);
REQUIRE(p.position() == 22);
2016-08-08 12:40:02 +02:00
pos = p.position();
2016-10-08 16:03:14 +02:00
REQUIRE(p.testAndReturn<bool>(true) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndReturn<bool>(false) == true);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<bool>(true) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<bool>(false) == true);
REQUIRE(p.position() == 25);
2016-08-08 12:40:02 +02:00
pos = p.position();
2016-10-08 16:03:14 +02:00
REQUIRE(p.testAndReturn<bool>(false) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndReturn<bool>(true) == true);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<bool>(false) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<bool>(true) == true);
REQUIRE(p.position() == 27);
2016-08-08 12:40:02 +02:00
}
////////////////////////////////////////////////////////////////////////////////////////////////////
2017-05-12 14:17:57 +02:00
TEST_CASE("[tokenizer] The end of the input stream is correctly handled", "[tokenizer]")
2016-05-27 03:58:59 +02:00
{
std::stringstream s1("test");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p1("input", s1);
2016-05-27 03:58:59 +02:00
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p1.expect<std::string>("test"));
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p1.get<std::string>(), tokenize::TokenizerException);
2016-05-27 03:58:59 +02:00
std::stringstream s2("test1 test2 test3");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p2("input", s2);
2016-05-27 03:58:59 +02:00
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p2.expect<std::string>("test1"));
REQUIRE_NOTHROW(p2.expect<std::string>("test2"));
REQUIRE_NOTHROW(p2.expect<std::string>("test3"));
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p2.get<std::string>(), tokenize::TokenizerException);
2016-05-27 03:58:59 +02:00
std::stringstream s3("-127");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p3("input", s3);
2016-05-27 03:58:59 +02:00
p3.expect<int>(-127);
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p3.get<int>(), tokenize::TokenizerException);
2016-05-27 03:58:59 +02:00
std::stringstream s4("128 -1023 -4095");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p4("input", s4);
2016-05-27 03:58:59 +02:00
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p4.expect<size_t>(128));
REQUIRE_NOTHROW(p4.expect<int>(-1023));
REQUIRE_NOTHROW(p4.expect<int>(-4095));
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p4.get<int>(), tokenize::TokenizerException);
2016-05-27 03:58:59 +02:00
std::stringstream s5("0");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p5("input", s5);
2016-05-27 03:58:59 +02:00
p5.expect<bool>(false);
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p5.get<bool>(), tokenize::TokenizerException);
2016-05-27 03:58:59 +02:00
std::stringstream s6("0 1 0");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p6("input", s6);
2016-05-27 03:58:59 +02:00
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p6.expect<bool>(false));
REQUIRE_NOTHROW(p6.expect<bool>(true));
REQUIRE_NOTHROW(p6.expect<bool>(false));
2017-05-12 14:17:57 +02:00
REQUIRE_THROWS_AS(p6.get<bool>(), tokenize::TokenizerException);
2016-05-24 02:23:56 +02:00
}
////////////////////////////////////////////////////////////////////////////////////////////////////
2017-05-12 14:17:57 +02:00
TEST_CASE("[tokenizer] While tokenizing, the cursor location is as expcected", "[tokenizer]")
{
std::stringstream s("123 \n4\ntest1\n test2\ntest3 \ntest4\n\n\n\n");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p("input", s);
2016-06-22 09:02:46 +01:00
const auto startPosition = p.position();
2017-05-12 14:17:57 +02:00
tokenize::Location l;
l = p.location();
REQUIRE(l.rowStart == 1u);
REQUIRE(l.columnStart == 1u);
2016-10-08 16:03:14 +02:00
REQUIRE(p.currentCharacter() == '1');
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.advance());
l = p.location();
REQUIRE(l.rowStart == 1u);
REQUIRE(l.columnStart == 2u);
2016-10-08 16:03:14 +02:00
REQUIRE(p.currentCharacter() == '2');
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.advance());
l = p.location();
REQUIRE(l.rowStart == 1u);
REQUIRE(l.columnStart == 3u);
2016-10-08 16:03:14 +02:00
REQUIRE(p.currentCharacter() == '3');
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.advance());
l = p.location();
REQUIRE(l.rowStart == 1u);
REQUIRE(l.columnStart == 4u);
2016-10-08 16:03:14 +02:00
REQUIRE(p.currentCharacter() == ' ');
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.advance());
l = p.location();
REQUIRE(l.rowStart == 1u);
REQUIRE(l.columnStart == 5u);
2016-10-08 16:03:14 +02:00
REQUIRE(p.currentCharacter() == '\n');
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.advance());
l = p.location();
REQUIRE(l.rowStart == 2u);
REQUIRE(l.columnStart == 1u);
2016-10-08 16:03:14 +02:00
REQUIRE(p.currentCharacter() == '4');
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.advance());
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.expect<std::string>("test1"));
l = p.location();
REQUIRE(l.rowStart == 3u);
REQUIRE(l.columnStart == 6u);
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.expect<std::string>("test2"));
l = p.location();
REQUIRE(l.rowStart == 4u);
REQUIRE(l.columnStart == 7u);
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.expect<std::string>("test3"));
l = p.location();
REQUIRE(l.rowStart == 5u);
REQUIRE(l.columnStart == 6u);
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.skipLine());
l = p.location();
REQUIRE(l.rowStart == 6u);
REQUIRE(l.columnStart == 1u);
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.skipLine());
l = p.location();
REQUIRE(l.rowStart == 7u);
REQUIRE(l.columnStart == 1u);
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.skipWhiteSpace());
l = p.location();
REQUIRE(l.rowStart == 10u);
REQUIRE(l.columnStart == 1u);
2016-10-08 16:03:14 +02:00
REQUIRE(p.atEnd());
2016-06-22 09:02:46 +01:00
p.reset();
2016-10-08 16:03:14 +02:00
REQUIRE(p.position() == startPosition);
REQUIRE_FALSE(p.atEnd());
2016-06-22 09:02:46 +01:00
for (size_t i = 0; i < 5; i++)
p.advance();
2016-10-08 16:03:14 +02:00
REQUIRE(p.position() == static_cast<std::istream::pos_type>(5));
p.seek(static_cast<std::istream::pos_type>(7));
2016-10-08 16:03:14 +02:00
REQUIRE(p.position() == static_cast<std::istream::pos_type>(7));
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p.expect<std::string>("test1"));
2017-05-12 14:17:57 +02:00
// TODO: test tokenizer with multiple sections
}
////////////////////////////////////////////////////////////////////////////////////////////////////
2017-05-12 14:17:57 +02:00
TEST_CASE("[tokenizer] Comments are correctly removed", "[tokenizer]")
{
std::stringstream s1("; comment at beginning\ntest1; comment in between\ntest2; comment at end");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p1("input", s1);
p1.removeComments(";", "\n", false);
2017-05-12 14:17:57 +02:00
tokenize::Location l;
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p1.expect<std::string>("test1"));
l = p1.location();
REQUIRE(l.rowStart == 2u);
REQUIRE(l.columnStart == 6u);
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p1.expect<std::string>("test2"));
l = p1.location();
REQUIRE(l.rowStart == 3u);
REQUIRE(l.columnStart == 6u);
p1.skipWhiteSpace();
2016-10-08 16:03:14 +02:00
REQUIRE(p1.atEnd());
std::stringstream s2("test;");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p2("input", s2);
p2.removeComments(";", "\n", false);
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p2.expect<std::string>("test"));
p2.skipWhiteSpace();
2016-10-08 16:03:14 +02:00
REQUIRE(p2.atEnd());
std::stringstream s3("/* comment at start */ test1 /* comment in between */ test2 /*");
2017-05-12 14:17:57 +02:00
tokenize::Tokenizer<> p3("input", s3);
p3.removeComments("/*", "*/", true);
2016-10-08 16:03:14 +02:00
REQUIRE_NOTHROW(p3.expect<std::string>("test1"));
REQUIRE_NOTHROW(p3.expect<std::string>("test2"));
p3.skipWhiteSpace();
2016-10-08 16:03:14 +02:00
REQUIRE(p3.atEnd());
}