From bb7ff0c0c2d20ce795feaf9e9647b30ad739367a Mon Sep 17 00:00:00 2001 From: Robin Linden Date: Tue, 21 Jan 2025 21:55:33 +0100 Subject: [PATCH] css2: Handle escape sequences more correctly --- css2/tokenizer.cpp | 11 +++++++++++ css2/tokenizer.h | 1 + css2/tokenizer_test.cpp | 14 ++++++++++++++ 3 files changed, 26 insertions(+) diff --git a/css2/tokenizer.cpp b/css2/tokenizer.cpp index 2b2b7ada..0f03a3e8 100644 --- a/css2/tokenizer.cpp +++ b/css2/tokenizer.cpp @@ -56,6 +56,8 @@ std::string_view to_string(ParseError e) { return "EofInEscapeSequence"; case ParseError::EofInString: return "EofInString"; + case ParseError::InvalidEscapeSequence: + return "InvalidEscapeSequence"; case ParseError::NewlineInString: return "NewlineInString"; } @@ -177,6 +179,15 @@ void Tokenizer::run() { case '[': emit(OpenSquareToken{}); continue; + case '\\': + if (is_valid_escape_sequence('\\', peek_input(0))) { + reconsume_in(State::IdentLike); + continue; + } + + emit(ParseError::InvalidEscapeSequence); + emit(DelimToken{'\\'}); + continue; case ']': emit(CloseSquareToken{}); continue; diff --git a/css2/tokenizer.h b/css2/tokenizer.h index d58b9115..d144a687 100644 --- a/css2/tokenizer.h +++ b/css2/tokenizer.h @@ -35,6 +35,7 @@ enum class ParseError : std::uint8_t { EofInComment, EofInEscapeSequence, EofInString, + InvalidEscapeSequence, NewlineInString, }; diff --git a/css2/tokenizer_test.cpp b/css2/tokenizer_test.cpp index 464333f9..c9a39924 100644 --- a/css2/tokenizer_test.cpp +++ b/css2/tokenizer_test.cpp @@ -316,6 +316,7 @@ int main() { s.add_test("at keyword start, but with bad escape", [](etest::IActions &a) { auto output = run_tokenizer(a, "@\\\n"); expect_token(output, DelimToken{'@'}); + expect_error(output, ParseError::InvalidEscapeSequence); expect_token(output, DelimToken{'\\'}); expect_token(output, WhitespaceToken{}); }); @@ -638,6 +639,19 @@ int main() { s.add_test("hash token: invalid escape", [](etest::IActions &a) { auto output = run_tokenizer(a, "#\\\n"); expect_token(output, DelimToken{'#'}); + expect_error(output, ParseError::InvalidEscapeSequence); + expect_token(output, DelimToken{'\\'}); + expect_token(output, WhitespaceToken{}); + }); + + s.add_test("\\: ident-like", [](etest::IActions &a) { + auto output = run_tokenizer(a, "\\Hallo"); + expect_token(output, IdentToken{"Hallo"}); + }); + + s.add_test("\\: invalid escape", [](etest::IActions &a) { + auto output = run_tokenizer(a, "\\\n"); + expect_error(output, ParseError::InvalidEscapeSequence); expect_token(output, DelimToken{'\\'}); expect_token(output, WhitespaceToken{}); });