diff --git a/lib/tokenize/include/tokenize/Tokenizer.h b/lib/tokenize/include/tokenize/Tokenizer.h index 5be10ec..b24128d 100644 --- a/lib/tokenize/include/tokenize/Tokenizer.h +++ b/lib/tokenize/include/tokenize/Tokenizer.h @@ -315,17 +315,39 @@ void Tokenizer::removeComments(const std::string &startSequence m_position = 0; + // TODO: refactor while (!atEnd()) { - while (!atEnd() && !testAndSkip(startSequence)) + bool startSequenceFound = false; + + while (!atEnd()) + { + if ((startSequenceFound = testAndSkip(startSequence))) + break; + advance(); + } - auto startPosition = m_position - startSequence.size(); + if (!startSequenceFound && atEnd()) + break; + + const auto startPosition = m_position - startSequence.size(); + + bool endSequenceFound = false; + + while (!atEnd()) + { + if ((endSequenceFound = testAndSkip(endSequence))) + break; - while (!atEnd() && !testAndSkip(endSequence)) advance(); + } - auto endPosition = (removeEnd) ? m_position : m_position - endSequence.size(); + // If the end sequence is to be removed or could not be found, remove entire range + const auto endPosition = + (removeEnd || !endSequenceFound) + ? m_position + : m_position - endSequence.size(); removeRange(startPosition, endPosition); diff --git a/lib/tokenize/tests/TestTokenizer.cpp b/lib/tokenize/tests/TestTokenizer.cpp index a23bc64..0318426 100644 --- a/lib/tokenize/tests/TestTokenizer.cpp +++ b/lib/tokenize/tests/TestTokenizer.cpp @@ -329,4 +329,16 @@ TEST_CASE("[tokenizer] Comments are correctly removed", "[tokenizer]") p3.skipWhiteSpace(); REQUIRE(p3.atEnd()); + + // Check that if there are no comments, the end is not accidentally truncated + std::stringstream s4("test foo bar"); + tokenize::Tokenizer<> p4("input", s4); + + p4.removeComments(";", "\n", false); + + REQUIRE_NOTHROW(p4.expect("test")); + REQUIRE_NOTHROW(p4.expect("foo")); + REQUIRE_NOTHROW(p4.expect("bar")); + + REQUIRE(p4.atEnd()); }