From 046c27578e828f190a7a75626a2ab96a9cab50b2 Mon Sep 17 00:00:00 2001 From: Robin Linden Date: Sun, 19 Nov 2023 04:06:08 +0100 Subject: [PATCH] html2: Add test for tokenizing replaced control characters --- html2/tokenizer_test.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/html2/tokenizer_test.cpp b/html2/tokenizer_test.cpp index 6855031d..61ced6cb 100644 --- a/html2/tokenizer_test.cpp +++ b/html2/tokenizer_test.cpp @@ -1218,6 +1218,13 @@ int main() { expect_token(tokens, EndOfFileToken{}); }); + etest::test("numeric character reference, control with replacement", [] { + auto tokens = run_tokenizer("Š"); + expect_text(tokens, "\xc5\xa0"); // U+0160: LATIN CAPITAL LETTER S WITH CARON + expect_error(tokens, ParseError::ControlCharacterReference); + expect_token(tokens, EndOfFileToken{}); + }); + etest::test("numeric character reference, no digits", [] { auto tokens = run_tokenizer("&#b;"); expect_text(tokens, "&#b;");