Renamed tokenizing module for clarity.

This commit is contained in:
2017-05-12 14:17:57 +02:00
parent c10187f6ba
commit e312a91632
77 changed files with 854 additions and 861 deletions

View File

@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 2.6)
project(tokenize)
option(TOKENIZE_BUILD_TESTS "Build unit tests" OFF)
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wpedantic -Werror")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
if (CMAKE_GENERATOR STREQUAL "Ninja" AND
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9) OR
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.5)))
# Force colored warnings in Ninja's output, if the compiler has -fdiagnostics-color support.
# Rationale in https://github.com/ninja-build/ninja/issues/814
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
endif()
add_subdirectory(src)
if(TOKENIZE_BUILD_TESTS)
add_subdirectory(tests)
endif(TOKENIZE_BUILD_TESTS)

View File

@@ -0,0 +1,31 @@
#ifndef __TOKENIZE__LOCATION_H
#define __TOKENIZE__LOCATION_H
#include <cstdlib>
namespace tokenize
{
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Location
//
////////////////////////////////////////////////////////////////////////////////////////////////////
struct Location
{
const char *sectionStart = nullptr;
const char *sectionEnd = nullptr;
std::size_t rowStart = -1;
std::size_t rowEnd = -1;
std::size_t columnStart = -1;
std::size_t columnEnd = -1;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif

View File

@@ -0,0 +1,66 @@
#ifndef __TOKENIZE__STREAM_H
#define __TOKENIZE__STREAM_H
#include <experimental/filesystem>
#include <iostream>
#include <iterator>
#include <sstream>
#include <vector>
#include <tokenize/Location.h>
namespace tokenize
{
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Stream
//
////////////////////////////////////////////////////////////////////////////////////////////////////
class Stream
{
public:
using Position = std::stringstream::pos_type;
struct Delimiter
{
Position position;
std::string sectionName;
};
public:
Stream();
explicit Stream(std::string streamName, std::istream &istream);
~Stream() = default;
Stream(const Stream &other) = delete;
Stream &operator=(const Stream &other) = delete;
Stream(Stream &&other) = default;
Stream &operator=(Stream &&other) = default;
void read(std::string streamName, std::istream &istream);
void read(const std::experimental::filesystem::path &path);
void reset();
void seek(Position position);
Position position() const;
Location location() const;
char currentCharacter() const;
void advance();
bool atEnd() const;
void check() const;
protected:
mutable std::stringstream m_stream;
std::vector<Delimiter> m_delimiters;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif

View File

@@ -0,0 +1,568 @@
#ifndef __TOKENIZE__TOKENIZER_H
#define __TOKENIZE__TOKENIZER_H
#include <algorithm>
#include <cassert>
#include <iostream>
#include <iterator>
#include <sstream>
#include <vector>
#include <tokenize/TokenizerException.h>
#include <tokenize/TokenizerPolicy.h>
#include <tokenize/Stream.h>
namespace tokenize
{
template<typename Type>
struct Tag
{
};
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Tokenizer
//
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy = CaseSensitiveTokenizerPolicy>
class Tokenizer: public Stream, public TokenizerPolicy
{
template<class OtherTokenizerPolicy>
friend class Tokenizer;
public:
explicit Tokenizer();
explicit Tokenizer(std::string streamName, std::istream &istream);
template<class OtherTokenizer>
Tokenizer(OtherTokenizer &&otherTokenizer)
{
m_stream = std::move(otherTokenizer.m_stream);
m_delimiters = std::move(otherTokenizer.m_delimiters);
}
void removeComments(const std::string &startSequence, const std::string &endSequence, bool removeEnd);
char currentCharacter() const;
template<typename Type>
Type get();
template<typename Type>
bool testAndReturn(const Type &expectedValue);
template<typename Type>
bool testAndSkip(const Type &expectedValue);
template<typename Type>
void expect(const Type &expectedValue);
// TODO: refactor
std::string getIdentifier();
bool testIdentifierAndReturn(const std::string &identifier);
bool testIdentifierAndSkip(const std::string &identifier);
// TODO: remove
bool probeNumber();
std::string getLine();
void skipWhiteSpace();
void skipBlankSpace();
void skipLine();
private:
std::string getImpl(Tag<std::string>);
char getImpl(Tag<char>);
uint64_t getImpl(Tag<uint64_t>);
int64_t getImpl(Tag<int64_t>);
uint32_t getImpl(Tag<uint32_t>);
int32_t getImpl(Tag<int32_t>);
bool getImpl(Tag<bool>);
bool testImpl(const std::string &expectedValue);
bool testImpl(char expectedValue);
bool testImpl(uint64_t expectedValue);
bool testImpl(int64_t expectedValue);
bool testImpl(uint32_t expectedValue);
bool testImpl(int32_t expectedValue);
bool testImpl(bool expectedValue);
uint64_t getIntegerBody();
};
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
Tokenizer<TokenizerPolicy>::Tokenizer()
: Stream()
{
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
Tokenizer<TokenizerPolicy>::Tokenizer(std::string streamName, std::istream &istream)
: Stream(streamName, istream)
{
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
void Tokenizer<TokenizerPolicy>::skipWhiteSpace()
{
check();
while (!atEnd() && TokenizerPolicy::isWhiteSpaceCharacter(currentCharacter()))
advance();
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
void Tokenizer<TokenizerPolicy>::skipBlankSpace()
{
check();
while (!atEnd() && TokenizerPolicy::isBlankCharacter(currentCharacter()))
advance();
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
void Tokenizer<TokenizerPolicy>::skipLine()
{
check();
while (currentCharacter() != '\n')
advance();
advance();
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
template<typename Type>
Type Tokenizer<TokenizerPolicy>::get()
{
return getImpl(Tag<Type>());
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
template<typename Type>
bool Tokenizer<TokenizerPolicy>::testAndReturn(const Type &expectedValue)
{
const auto previousPosition = position();
const auto result = testImpl(expectedValue);
seek(previousPosition);
return result;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
template<typename Type>
bool Tokenizer<TokenizerPolicy>::testAndSkip(const Type &expectedValue)
{
const auto previousPosition = position();
const auto result = testImpl(expectedValue);
if (result == false)
seek(previousPosition);
return result;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
template<typename Type>
void Tokenizer<TokenizerPolicy>::expect(const Type &expectedValue)
{
if (testAndSkip(expectedValue))
return;
std::stringstream message;
message << "unexpected value, expected “" << expectedValue << "";
throw TokenizerException(location(), message.str());
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
std::string Tokenizer<TokenizerPolicy>::getIdentifier()
{
skipWhiteSpace();
std::string value;
while (true)
{
const auto character = currentCharacter();
if (!TokenizerPolicy::isIdentifierCharacter(character))
return value;
value.push_back(character);
advance();
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
bool Tokenizer<TokenizerPolicy>::testIdentifierAndSkip(const std::string &expectedValue)
{
return testAndSkip(expectedValue) && !TokenizerPolicy::isIdentifierCharacter(currentCharacter());
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
bool Tokenizer<TokenizerPolicy>::probeNumber()
{
const auto previousPosition = position();
skipWhiteSpace();
while (!TokenizerPolicy::isWhiteSpaceCharacter(currentCharacter()))
if (!std::isdigit(currentCharacter()))
{
seek(previousPosition);
return false;
}
return true;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
std::string Tokenizer<TokenizerPolicy>::getLine()
{
std::string value;
while (true)
{
const auto character = currentCharacter();
advance();
if (character == '\n')
break;
else if (character == '\r')
continue;
value.push_back(character);
}
return value;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
void Tokenizer<TokenizerPolicy>::removeComments(const std::string &startSequence, const std::string &endSequence, bool removeEnd)
{
const auto inPosition = m_stream.tellg();
const auto outPosition = m_stream.tellp();
m_stream.seekg(0);
const auto removeRange =
[&](const auto &start, const auto &end)
{
assert(start != -1);
m_stream.clear();
m_stream.seekp(start);
m_stream.seekg(start);
auto position = start;
while (end == -1 || position < end)
{
m_stream.ignore(1);
if (atEnd())
return;
m_stream.put(' ');
position += static_cast<std::streamoff>(1);
}
};
while (!atEnd())
{
Position startPosition = m_stream.tellg();
while (!atEnd())
{
startPosition = m_stream.tellg();
if (testAndSkip(startSequence))
break;
advance();
}
Position endPosition = m_stream.tellg();
while (!atEnd())
{
endPosition = m_stream.tellg();
if (testAndSkip(endSequence))
break;
advance();
}
if (removeEnd)
endPosition = m_stream.tellg();
removeRange(startPosition, endPosition);
}
m_stream.clear();
m_stream.seekg(inPosition);
m_stream.seekp(outPosition);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
char Tokenizer<TokenizerPolicy>::currentCharacter() const
{
return TokenizerPolicy::transformCharacter(Stream::currentCharacter());
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
std::string Tokenizer<TokenizerPolicy>::getImpl(Tag<std::string>)
{
skipWhiteSpace();
const auto startPosition = position();
while (!TokenizerPolicy::isWhiteSpaceCharacter(currentCharacter()))
advance();
const auto endPosition = position();
const auto length = static_cast<size_t>(endPosition - startPosition);
std::string value;
value.reserve(length);
seek(startPosition);
for (size_t i = 0; i < length; i++)
{
value.push_back(currentCharacter());
advance();
}
return value;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
char Tokenizer<TokenizerPolicy>::getImpl(Tag<char>)
{
const auto value = currentCharacter();
advance();
return value;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
uint64_t Tokenizer<TokenizerPolicy>::getIntegerBody()
{
check();
if (!std::isdigit(currentCharacter()))
throw TokenizerException(location(), "could not read integer value");
uint64_t value = 0;
while (!atEnd())
{
const auto character = currentCharacter();
if (!std::isdigit(character))
break;
value *= 10;
value += character - '0';
advance();
}
return value;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
int64_t Tokenizer<TokenizerPolicy>::getImpl(Tag<int64_t>)
{
skipWhiteSpace();
bool positive = testAndSkip<char>('+') || !testAndSkip<char>('-');
const auto value = getIntegerBody();
return (positive ? value : -value);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
uint64_t Tokenizer<TokenizerPolicy>::getImpl(Tag<uint64_t>)
{
skipWhiteSpace();
if (currentCharacter() == '-')
throw TokenizerException(location(), "expected unsigned integer, got signed one");
return getIntegerBody();
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
int32_t Tokenizer<TokenizerPolicy>::getImpl(Tag<int32_t>)
{
return static_cast<int32_t>(getImpl(Tag<int64_t>()));
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
uint32_t Tokenizer<TokenizerPolicy>::getImpl(Tag<uint32_t>)
{
return static_cast<uint32_t>(getImpl(Tag<uint64_t>()));
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
bool Tokenizer<TokenizerPolicy>::getImpl(Tag<bool>)
{
skipWhiteSpace();
if (testAndSkip<char>('0'))
return false;
if (testAndSkip<char>('1'))
return true;
throw TokenizerException(location(), "could not read Boolean value");
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
bool Tokenizer<TokenizerPolicy>::testImpl(const std::string &expectedValue)
{
if (!TokenizerPolicy::isWhiteSpaceCharacter(expectedValue.front()))
skipWhiteSpace();
const auto match = std::find_if(expectedValue.cbegin(), expectedValue.cend(),
[&](const auto &expectedCharacter)
{
const auto character = static_cast<char>(this->currentCharacter());
if (character != expectedCharacter)
return true;
this->advance();
return false;
});
return (match == expectedValue.cend());
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
bool Tokenizer<TokenizerPolicy>::testImpl(char expectedValue)
{
const auto result = (currentCharacter() == expectedValue);
advance();
return result;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
bool Tokenizer<TokenizerPolicy>::testImpl(int64_t expectedValue)
{
const auto value = getImpl(Tag<int64_t>());
return (value == expectedValue);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
bool Tokenizer<TokenizerPolicy>::testImpl(uint64_t expectedValue)
{
const auto value = getImpl(Tag<uint64_t>());
return (value == expectedValue);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
bool Tokenizer<TokenizerPolicy>::testImpl(int32_t expectedValue)
{
return testImpl(static_cast<int64_t>(expectedValue));
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
bool Tokenizer<TokenizerPolicy>::testImpl(uint32_t expectedValue)
{
return testImpl(static_cast<uint64_t>(expectedValue));
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
bool Tokenizer<TokenizerPolicy>::testImpl(bool expectedValue)
{
const auto value = getImpl(Tag<bool>());
return (value == expectedValue);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif

View File

@@ -0,0 +1,67 @@
#ifndef __TOKENIZE__TOKENIZER_EXCEPTION_H
#define __TOKENIZE__TOKENIZER_EXCEPTION_H
#include <exception>
#include <string>
#include <tokenize/Location.h>
namespace tokenize
{
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// TokenizerException
//
////////////////////////////////////////////////////////////////////////////////////////////////////
class TokenizerException: public std::exception
{
public:
explicit TokenizerException(const Location &location)
: TokenizerException(location, "unspecified tokenizer error")
{
}
explicit TokenizerException(const Location &location, const char *message)
: TokenizerException(location, static_cast<std::string>(message))
{
}
explicit TokenizerException(const Location &location, const std::string &message)
: m_location{location},
m_message{message},
// TODO: refactor
m_plainMessage{std::string(m_location.sectionStart) + ":" + std::to_string(m_location.rowStart)
+ ":" + std::to_string(m_location.columnStart) + " " + m_message}
{
}
~TokenizerException() noexcept = default;
const char *what() const noexcept
{
return m_plainMessage.c_str();
}
const Location &location() const
{
return m_location;
}
const std::string &message() const
{
return m_message;
}
private:
Location m_location;
std::string m_message;
std::string m_plainMessage;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif

View File

@@ -0,0 +1,67 @@
#ifndef __TOKENIZE__TOKENIZER_POLICY_H
#define __TOKENIZE__TOKENIZER_POLICY_H
#include <iostream>
namespace tokenize
{
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// TokenizerPolicy
//
////////////////////////////////////////////////////////////////////////////////////////////////////
struct CaseSensitiveTokenizerPolicy
{
static constexpr char transformCharacter(char c) noexcept
{
return c;
}
static bool isWhiteSpaceCharacter(char c)
{
return std::iswspace(c);
}
static bool isBlankCharacter(char c)
{
return std::isblank(c);
}
static bool isIdentifierCharacter(char c)
{
return std::isgraph(c);
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////
struct CaseInsensitiveTokenizerPolicy
{
static char transformCharacter(char c) noexcept
{
return std::tolower(c);
}
static bool isWhiteSpaceCharacter(char c)
{
return std::iswspace(c);
}
static bool isBlankCharacter(char c)
{
return std::isblank(c);
}
static bool isIdentifierCharacter(char c)
{
return std::isgraph(c);
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif

View File

@@ -0,0 +1,21 @@
set(target tokenize)
file(GLOB core_sources "tokenize/*.cpp")
file(GLOB core_headers "../include/tokenize/*.h")
set(includes
${PROJECT_SOURCE_DIR}/include
)
set(sources
${core_sources}
${core_headers}
)
set(libraries
stdc++fs
)
add_library(${target} ${sources})
target_include_directories(${target} PRIVATE ${includes})
target_link_libraries(${target} ${libraries})

View File

@@ -0,0 +1,162 @@
#include <tokenize/Stream.h>
#include <algorithm>
#include <fstream>
#include <tokenize/TokenizerException.h>
namespace tokenize
{
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Stream
//
////////////////////////////////////////////////////////////////////////////////////////////////////
Stream::Stream()
{
std::setlocale(LC_NUMERIC, "C");
// Dont skip whitespace
m_stream.exceptions(std::istream::badbit);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
Stream::Stream(std::string streamName, std::istream &istream)
{
read(streamName, istream);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::read(std::string streamName, std::istream &istream)
{
// Store position of new section
const auto position = m_stream.tellp();
m_delimiters.push_back({position, streamName});
m_stream << istream.rdbuf();
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::read(const std::experimental::filesystem::path &path)
{
if (!std::experimental::filesystem::is_regular_file(path))
throw std::runtime_error("File does not exist: “" + path.string() + "");
std::ifstream fileStream(path.string(), std::ios::in);
read(path.string(), fileStream);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::reset()
{
m_stream.clear();
seek(0);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::seek(Position position)
{
m_stream.clear();
m_stream.seekg(position);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
typename Stream::Position Stream::position() const
{
return m_stream.tellg();
}
////////////////////////////////////////////////////////////////////////////////////////////////////
Location Stream::location() const
{
const auto currentPosition = position();
// Find current section
auto currentFile = std::find_if(m_delimiters.crbegin(), m_delimiters.crend(),
[&](const auto &fileDelimiter)
{
return currentPosition >= fileDelimiter.position;
});
// If the tokenizer is at the end of the stream, still count from the beginning of the last section
if (currentFile == m_delimiters.crend())
currentFile = m_delimiters.crbegin();
// Go back to beginning of section
m_stream.clear();
m_stream.seekg(currentFile->position);
size_t row = 1;
size_t column = 1;
// Compute the location character by character
while (true)
{
if (currentPosition == -1 && atEnd())
break;
else if (currentPosition >= 0 && position() >= currentPosition)
break;
const auto character = currentCharacter();
if (character == '\n')
{
row++;
column = 1;
}
else if (std::isblank(character) || std::isprint(character))
column++;
m_stream.ignore(1);
}
return {currentFile->sectionName.c_str(), currentFile->sectionName.c_str(), row, row, column, column};
}
////////////////////////////////////////////////////////////////////////////////////////////////////
char Stream::currentCharacter() const
{
return m_stream.peek();
}
////////////////////////////////////////////////////////////////////////////////////////////////////
bool Stream::atEnd() const
{
return position() == -1;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::check() const
{
if (atEnd())
throw TokenizerException(location(), "reading past end of file");
if (m_stream.fail())
throw TokenizerException(location());
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::advance()
{
check();
m_stream.ignore(1);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
}

View File

@@ -0,0 +1,21 @@
set(target tokenize-tests)
file(GLOB core_sources "*.cpp")
set(includes
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/../../lib/catch/single_include
)
set(libraries
tokenize
)
add_executable(${target} ${core_sources})
target_include_directories(${target} PRIVATE ${includes})
target_link_libraries(${target} ${libraries})
add_custom_target(run-tokenize-tests
COMMAND ${CMAKE_BINARY_DIR}/bin/tokenize-tests
DEPENDS ${target}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests)

View File

@@ -0,0 +1,332 @@
#include <catch.hpp>
#include <tokenize/Tokenizer.h>
#include <tokenize/TokenizerException.h>
////////////////////////////////////////////////////////////////////////////////////////////////////
TEST_CASE("[tokenizer] Simple strings are tokenized correctly", "[tokenizer]")
{
std::stringstream s(" identifier 5 \n-51\t 0 1 100 200 -300 -400");
tokenize::Tokenizer<> p("input", s);
REQUIRE(p.get<std::string>() == "identifier");
REQUIRE(p.get<size_t>() == 5u);
REQUIRE(p.get<int>() == -51);
REQUIRE(p.get<bool>() == false);
REQUIRE(p.get<bool>() == true);
REQUIRE(p.get<int>() == 100);
REQUIRE(p.get<size_t>() == 200u);
REQUIRE(p.get<int>() == -300);
REQUIRE_THROWS_AS(p.get<size_t>(), tokenize::TokenizerException);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
TEST_CASE("[tokenizer] Tokenizing exceptions are correctly reported", "[tokenizer]")
{
std::stringstream s(" identifier 5 \n-51\t 0 1 100 200 -300 -400");
tokenize::Tokenizer<> p("input", s);
REQUIRE_NOTHROW(p.expect<std::string>("identifier"));
REQUIRE_NOTHROW(p.expect<size_t>(5u));
REQUIRE_NOTHROW(p.expect<int>(-51));
REQUIRE_NOTHROW(p.expect<bool>(false));
REQUIRE_NOTHROW(p.expect<bool>(true));
REQUIRE_NOTHROW(p.expect<int>(100));
REQUIRE_NOTHROW(p.expect<size_t>(200u));
REQUIRE_NOTHROW(p.expect<int>(-300));
REQUIRE_THROWS_AS(p.expect<size_t>(-400), tokenize::TokenizerException);
p.seek(0);
REQUIRE_THROWS_AS(p.expect<std::string>("error"), tokenize::TokenizerException);
p.seek(14);
REQUIRE_THROWS_AS(p.expect<size_t>(6u), tokenize::TokenizerException);
p.seek(17);
REQUIRE_THROWS_AS(p.expect<int>(-50), tokenize::TokenizerException);
p.seek(24);
REQUIRE_THROWS_AS(p.expect<bool>(true), tokenize::TokenizerException);
p.seek(26);
REQUIRE_THROWS_AS(p.expect<bool>(false), tokenize::TokenizerException);
p.seek(28);
REQUIRE_THROWS_AS(p.expect<int>(101), tokenize::TokenizerException);
p.seek(31);
REQUIRE_THROWS_AS(p.expect<size_t>(201), tokenize::TokenizerException);
p.seek(34);
REQUIRE_THROWS_AS(p.expect<int>(-299), tokenize::TokenizerException);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
TEST_CASE("[tokenizer] While tokenizing, the cursor position is as expected", "[tokenizer]")
{
std::stringstream s(" identifier 5 \n-51\t 0 1");
tokenize::Tokenizer<> p("input", s);
tokenize::Tokenizer<>::Position pos;
pos = p.position();
REQUIRE(p.testAndReturn<std::string>("error") == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndReturn<std::string>("identifier") == true);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<std::string>("error") == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<std::string>("identifier") == true);
REQUIRE(p.position() == 12);
pos = p.position();
REQUIRE(p.testAndReturn<size_t>(6u) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndReturn<size_t>(5u) == true);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<size_t>(6u) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<size_t>(5u) == true);
REQUIRE(p.position() == 15);
pos = p.position();
REQUIRE(p.testAndReturn<int>(-50) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndReturn<int>(-51) == true);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<int>(-50) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<int>(-51) == true);
REQUIRE(p.position() == 22);
pos = p.position();
REQUIRE(p.testAndReturn<bool>(true) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndReturn<bool>(false) == true);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<bool>(true) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<bool>(false) == true);
REQUIRE(p.position() == 25);
pos = p.position();
REQUIRE(p.testAndReturn<bool>(false) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndReturn<bool>(true) == true);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<bool>(false) == false);
REQUIRE(p.position() == pos);
REQUIRE(p.testAndSkip<bool>(true) == true);
REQUIRE(p.position() == 27);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
TEST_CASE("[tokenizer] The end of the input stream is correctly handled", "[tokenizer]")
{
std::stringstream s1("test");
tokenize::Tokenizer<> p1("input", s1);
REQUIRE_NOTHROW(p1.expect<std::string>("test"));
REQUIRE_THROWS_AS(p1.get<std::string>(), tokenize::TokenizerException);
std::stringstream s2("test1 test2 test3");
tokenize::Tokenizer<> p2("input", s2);
REQUIRE_NOTHROW(p2.expect<std::string>("test1"));
REQUIRE_NOTHROW(p2.expect<std::string>("test2"));
REQUIRE_NOTHROW(p2.expect<std::string>("test3"));
REQUIRE_THROWS_AS(p2.get<std::string>(), tokenize::TokenizerException);
std::stringstream s3("-127");
tokenize::Tokenizer<> p3("input", s3);
p3.expect<int>(-127);
REQUIRE_THROWS_AS(p3.get<int>(), tokenize::TokenizerException);
std::stringstream s4("128 -1023 -4095");
tokenize::Tokenizer<> p4("input", s4);
REQUIRE_NOTHROW(p4.expect<size_t>(128));
REQUIRE_NOTHROW(p4.expect<int>(-1023));
REQUIRE_NOTHROW(p4.expect<int>(-4095));
REQUIRE_THROWS_AS(p4.get<int>(), tokenize::TokenizerException);
std::stringstream s5("0");
tokenize::Tokenizer<> p5("input", s5);
p5.expect<bool>(false);
REQUIRE_THROWS_AS(p5.get<bool>(), tokenize::TokenizerException);
std::stringstream s6("0 1 0");
tokenize::Tokenizer<> p6("input", s6);
REQUIRE_NOTHROW(p6.expect<bool>(false));
REQUIRE_NOTHROW(p6.expect<bool>(true));
REQUIRE_NOTHROW(p6.expect<bool>(false));
REQUIRE_THROWS_AS(p6.get<bool>(), tokenize::TokenizerException);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
TEST_CASE("[tokenizer] While tokenizing, the cursor location is as expcected", "[tokenizer]")
{
std::stringstream s("123 \n4\ntest1\n test2\ntest3 \ntest4\n\n\n\n");
tokenize::Tokenizer<> p("input", s);
const auto startPosition = p.position();
tokenize::Location l;
l = p.location();
REQUIRE(l.rowStart == 1u);
REQUIRE(l.columnStart == 1u);
REQUIRE(p.currentCharacter() == '1');
REQUIRE_NOTHROW(p.advance());
l = p.location();
REQUIRE(l.rowStart == 1u);
REQUIRE(l.columnStart == 2u);
REQUIRE(p.currentCharacter() == '2');
REQUIRE_NOTHROW(p.advance());
l = p.location();
REQUIRE(l.rowStart == 1u);
REQUIRE(l.columnStart == 3u);
REQUIRE(p.currentCharacter() == '3');
REQUIRE_NOTHROW(p.advance());
l = p.location();
REQUIRE(l.rowStart == 1u);
REQUIRE(l.columnStart == 4u);
REQUIRE(p.currentCharacter() == ' ');
REQUIRE_NOTHROW(p.advance());
l = p.location();
REQUIRE(l.rowStart == 1u);
REQUIRE(l.columnStart == 5u);
REQUIRE(p.currentCharacter() == '\n');
REQUIRE_NOTHROW(p.advance());
l = p.location();
REQUIRE(l.rowStart == 2u);
REQUIRE(l.columnStart == 1u);
REQUIRE(p.currentCharacter() == '4');
REQUIRE_NOTHROW(p.advance());
REQUIRE_NOTHROW(p.expect<std::string>("test1"));
l = p.location();
REQUIRE(l.rowStart == 3u);
REQUIRE(l.columnStart == 6u);
REQUIRE_NOTHROW(p.expect<std::string>("test2"));
l = p.location();
REQUIRE(l.rowStart == 4u);
REQUIRE(l.columnStart == 7u);
REQUIRE_NOTHROW(p.expect<std::string>("test3"));
l = p.location();
REQUIRE(l.rowStart == 5u);
REQUIRE(l.columnStart == 6u);
REQUIRE_NOTHROW(p.skipLine());
l = p.location();
REQUIRE(l.rowStart == 6u);
REQUIRE(l.columnStart == 1u);
REQUIRE_NOTHROW(p.skipLine());
l = p.location();
REQUIRE(l.rowStart == 7u);
REQUIRE(l.columnStart == 1u);
REQUIRE_NOTHROW(p.skipWhiteSpace());
l = p.location();
REQUIRE(l.rowStart == 10u);
REQUIRE(l.columnStart == 1u);
REQUIRE(p.atEnd());
p.reset();
REQUIRE(p.position() == startPosition);
REQUIRE_FALSE(p.atEnd());
for (size_t i = 0; i < 5; i++)
p.advance();
REQUIRE(p.position() == static_cast<std::istream::pos_type>(5));
p.seek(static_cast<std::istream::pos_type>(7));
REQUIRE(p.position() == static_cast<std::istream::pos_type>(7));
REQUIRE_NOTHROW(p.expect<std::string>("test1"));
// TODO: test tokenizer with multiple sections
}
////////////////////////////////////////////////////////////////////////////////////////////////////
TEST_CASE("[tokenizer] Comments are correctly removed", "[tokenizer]")
{
std::stringstream s1("; comment at beginning\ntest1; comment in between\ntest2; comment at end");
tokenize::Tokenizer<> p1("input", s1);
p1.removeComments(";", "\n", false);
tokenize::Location l;
REQUIRE_NOTHROW(p1.expect<std::string>("test1"));
l = p1.location();
REQUIRE(l.rowStart == 2u);
REQUIRE(l.columnStart == 6u);
REQUIRE_NOTHROW(p1.expect<std::string>("test2"));
l = p1.location();
REQUIRE(l.rowStart == 3u);
REQUIRE(l.columnStart == 6u);
p1.skipWhiteSpace();
REQUIRE(p1.atEnd());
std::stringstream s2("test;");
tokenize::Tokenizer<> p2("input", s2);
p2.removeComments(";", "\n", false);
REQUIRE_NOTHROW(p2.expect<std::string>("test"));
p2.skipWhiteSpace();
REQUIRE(p2.atEnd());
std::stringstream s3("/* comment at start */ test1 /* comment in between */ test2 /*");
tokenize::Tokenizer<> p3("input", s3);
p3.removeComments("/*", "*/", true);
REQUIRE_NOTHROW(p3.expect<std::string>("test1"));
REQUIRE_NOTHROW(p3.expect<std::string>("test2"));
p3.skipWhiteSpace();
REQUIRE(p3.atEnd());
}

View File

@@ -0,0 +1,2 @@
#define CATCH_CONFIG_MAIN
#include <catch.hpp>