Skip to content

Commit

Permalink
html2: Improve support for parsing <table> elements
Browse files Browse the repository at this point in the history
  • Loading branch information
robinlinden committed Jan 6, 2025
1 parent 949edf6 commit 265f154
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 9 deletions.
14 changes: 13 additions & 1 deletion html/parser_actions.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2023-2024 Robin Lindén <[email protected]>
// SPDX-FileCopyrightText: 2023-2025 Robin Lindén <[email protected]>
//
// SPDX-License-Identifier: BSD-2-Clause

Expand Down Expand Up @@ -49,6 +49,18 @@ class Actions : public html2::IActions {
}();
}

html2::QuirksMode quirks_mode() const override {
switch (document_.mode) {
case dom::Document::Mode::NoQuirks:
return html2::QuirksMode::NoQuirks;
case dom::Document::Mode::Quirks:
return html2::QuirksMode::Quirks;
case dom::Document::Mode::LimitedQuirks:
return html2::QuirksMode::LimitedQuirks;

Check warning on line 59 in html/parser_actions.h

View check run for this annotation

Codecov / codecov/patch

html/parser_actions.h#L59

Added line #L59 was not covered by tests
}
return html2::QuirksMode::LimitedQuirks;

Check warning on line 61 in html/parser_actions.h

View check run for this annotation

Codecov / codecov/patch

html/parser_actions.h#L61

Added line #L61 was not covered by tests
}

bool scripting() const override { return scripting_; }

void insert_element_for(html2::StartTagToken const &token) override {
Expand Down
16 changes: 11 additions & 5 deletions html2/iparser_actions.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2023-2024 Robin Lindén <[email protected]>
// SPDX-FileCopyrightText: 2023-2025 Robin Lindén <[email protected]>
//
// SPDX-License-Identifier: BSD-2-Clause

Expand Down Expand Up @@ -31,6 +31,7 @@ class IActions {

virtual void set_doctype_name(std::string) = 0;
virtual void set_quirks_mode(QuirksMode) = 0;
virtual QuirksMode quirks_mode() const = 0;
virtual bool scripting() const = 0;
virtual void insert_element_for(html2::StartTagToken const &) = 0;
virtual void pop_current_node() = 0;
Expand Down Expand Up @@ -59,13 +60,13 @@ class IActions {
template<auto const &scope_elements>
bool has_element_in_scope_impl(std::string_view element_name) const {
for (auto const element : names_of_open_elements()) {
if (is_in_array<scope_elements>(element)) {
return false;
}

if (element == element_name) {
return true;
}

if (is_in_array<scope_elements>(element)) {
return false;
}
}

return false;

Check warning on line 72 in html2/iparser_actions.h

View check run for this annotation

Codecov / codecov/patch

html2/iparser_actions.h#L72

Added line #L72 was not covered by tests
Expand Down Expand Up @@ -97,6 +98,11 @@ class IActions {

return has_element_in_scope_impl<kScopeElements>(element_name);
}

bool has_element_in_table_scope(std::string_view element_name) const {
static constexpr auto kScopeElements = std::to_array<std::string_view>({"html", "table", "template"});
return has_element_in_scope_impl<kScopeElements>(element_name);
}
};

} // namespace html2
Expand Down
94 changes: 94 additions & 0 deletions html2/parser_states.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class InternalActions : public IActions {

void set_doctype_name(std::string name) override { wrapped_.set_doctype_name(std::move(name)); }
void set_quirks_mode(QuirksMode quirks) override { wrapped_.set_quirks_mode(quirks); }
QuirksMode quirks_mode() const override { return wrapped_.quirks_mode(); }
bool scripting() const override { return wrapped_.scripting(); }
void insert_element_for(html2::StartTagToken const &token) override { wrapped_.insert_element_for(token); }
void pop_current_node() override { wrapped_.pop_current_node(); }
Expand Down Expand Up @@ -296,6 +297,37 @@ void generate_implied_end_tags(IActions &a, std::optional<std::string_view> exce
}
}

// https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately
InsertionMode appropriate_insertion_mode(IActions &a) {
auto open_elements = a.names_of_open_elements();
for (auto node : open_elements) {
// TODO(robinlinden): Lots of table nonsense.
if (node == "table") {
return InTable{};

Check warning on line 306 in html2/parser_states.cpp

View check run for this annotation

Codecov / codecov/patch

html2/parser_states.cpp#L306

Added line #L306 was not covered by tests
}

// TODO(robinlinden): Template nonsense. :(

if (node == "head") {
return InHead{};

Check warning on line 312 in html2/parser_states.cpp

View check run for this annotation

Codecov / codecov/patch

html2/parser_states.cpp#L312

Added line #L312 was not covered by tests
}

if (node == "body") {
return InBody{};
}

if (node == "frameset") {
return InFrameset{};

Check warning on line 320 in html2/parser_states.cpp

View check run for this annotation

Codecov / codecov/patch

html2/parser_states.cpp#L320

Added line #L320 was not covered by tests
}

if (node == "html") {
// TODO(robinlinden): head element pointer.
return AfterHead{};

Check warning on line 325 in html2/parser_states.cpp

View check run for this annotation

Codecov / codecov/patch

html2/parser_states.cpp#L325

Added line #L325 was not covered by tests
}
}

return InBody{};

Check warning on line 329 in html2/parser_states.cpp

View check run for this annotation

Codecov / codecov/patch

html2/parser_states.cpp#L329

Added line #L329 was not covered by tests
}
} // namespace

// https://html.spec.whatwg.org/multipage/parsing.html#the-initial-insertion-mode
Expand Down Expand Up @@ -893,6 +925,16 @@ std::optional<InsertionMode> InBody::process(IActions &a, html2::Token const &to

// TODO(robinlinden): Most things.

if (start != nullptr && start->tag_name == "table") {
if (a.quirks_mode() != QuirksMode::Quirks && a.has_element_in_button_scope("p")) {
close_a_p_element();
}

a.insert_element_for(*start);
a.set_frameset_ok(false);
return InTable{};
}

static constexpr auto kImmediatelyPoppedElements = std::to_array<std::string_view>({
"area",
"br",
Expand Down Expand Up @@ -999,6 +1041,58 @@ std::optional<InsertionMode> Text::process(IActions &a, html2::Token const &toke
return {};
}

// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intable
// Incomplete.
std::optional<InsertionMode> InTable::process(IActions &a, html2::Token const &token) {
// TODO(robinlinden): CharacterToken.

if (std::holds_alternative<html2::CommentToken>(token)) {
// TODO(robinlinden): Insert.
return {};
}

if (std::holds_alternative<html2::DoctypeToken>(token)) {
// Parse error.
return {};
}

// TODO(robinlinden): Everything.

auto const *end = std::get_if<html2::EndTagToken>(&token);
if (end != nullptr && end->tag_name == "table") {
if (!a.has_element_in_table_scope("table")) {
// Parse error.
return {};

Check warning on line 1065 in html2/parser_states.cpp

View check run for this annotation

Codecov / codecov/patch

html2/parser_states.cpp#L1065

Added line #L1065 was not covered by tests
}

while (a.current_node_name() != "table") {
a.pop_current_node();

Check warning on line 1069 in html2/parser_states.cpp

View check run for this annotation

Codecov / codecov/patch

html2/parser_states.cpp#L1069

Added line #L1069 was not covered by tests
}

a.pop_current_node();
return appropriate_insertion_mode(a);
}

static constexpr auto kBadEndTags = std::to_array<std::string_view>(
{"body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"});
if (end != nullptr && is_in_array<kBadEndTags>(end->tag_name)) {
// Parse error.
return {};
}

auto const *start = std::get_if<html2::StartTagToken>(&token);
static constexpr auto kInHeadStartTags = std::to_array<std::string_view>({"style", "script", "template"});
if ((start != nullptr && is_in_array<kInHeadStartTags>(start->tag_name))
|| (end != nullptr && end->tag_name == "template")) {
auto mode_override = current_insertion_mode_override(a, InTable{});
return InHead{}.process(mode_override, token);
}

// TODO(robinlinden): Everything.

return {};
}

// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-afterbody
// Incomplete.
std::optional<InsertionMode> AfterBody::process(IActions &, html2::Token const &token) {
Expand Down
9 changes: 6 additions & 3 deletions html2/parser_states.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2023-2024 Robin Lindén <[email protected]>
// SPDX-FileCopyrightText: 2023-2025 Robin Lindén <[email protected]>
//
// SPDX-License-Identifier: BSD-2-Clause

Expand Down Expand Up @@ -46,7 +46,7 @@ using InsertionMode = std::variant<Initial,
AfterHead,
InBody,
Text,
// InTable,
InTable,
// InTableText,
// InCaption,
// InColumnGroup,
Expand All @@ -63,7 +63,6 @@ using InsertionMode = std::variant<Initial,
// AfterAfterFrameset
>;

struct InTable {};
struct InTableText {};
struct InCaption {};
struct InColumnGroup {};
Expand Down Expand Up @@ -107,6 +106,10 @@ struct Text {
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};

struct InTable {
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};

struct AfterBody {
std::optional<InsertionMode> process(IActions &, html2::Token const &);
};
Expand Down
52 changes: 52 additions & 0 deletions html2/parser_states_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,24 @@ void in_body_tests(etest::Suite &s) {
a.expect_eq(body, dom::Element{"body", {}, {dom::Element{"br"}}});
});

s.add_test("InBody: <table>", [](etest::IActions &a) {
auto res = parse("<body><table>", {});
auto const &body = std::get<dom::Element>(res.document.html().children.at(1));
a.expect_eq(body, dom::Element{"body", {}, {dom::Element{"table"}}});
});

s.add_test("InBody: <p><table>", [](etest::IActions &a) {
auto res = parse("<body><p><table>", {});
auto const &body = std::get<dom::Element>(res.document.html().children.at(1));
a.expect_eq(body, dom::Element{"body", {}, {dom::Element{"p"}, dom::Element{"table"}}});
});

s.add_test("InBody: <p><table>, but quirky!", [](etest::IActions &a) {
auto res = parse("<!DOCTYPE><body><p><table>", {});
auto const &body = std::get<dom::Element>(res.document.html().children.at(1));
a.expect_eq(body, dom::Element{"body", {}, {dom::Element{"p", {}, {dom::Element{"table"}}}}});
});

s.add_test("InBody: <template> doesn't crash", [](etest::IActions &) {
std::ignore = parse("<body><template>", {}); //
});
Expand Down Expand Up @@ -597,6 +615,39 @@ void in_body_tests(etest::Suite &s) {
});
}

void in_table_tests(etest::Suite &s) {
s.add_test("InTable: comment", [](etest::IActions &a) {
auto res = parse("<table><!-- comment -->", {});
auto const &body = std::get<dom::Element>(res.document.html().children.at(1));
a.expect_eq(body, dom::Element{"body", {}, {dom::Element{"table"}}});
});

s.add_test("InTable: doctype", [](etest::IActions &a) {
auto res = parse("<table><!doctype html>", {});
auto const &body = std::get<dom::Element>(res.document.html().children.at(1));
a.expect_eq(body, dom::Element{"body", {}, {dom::Element{"table"}}});
});

s.add_test("InTable: </body>", [](etest::IActions &a) {
// This will break once we implement more table parsing.
auto res = parse("<table></html><tbody>", {});
auto const &body = std::get<dom::Element>(res.document.html().children.at(1));
a.expect_eq(body, dom::Element{"body", {}, {dom::Element{"table"}}});
});

s.add_test("InTable: <style>", [](etest::IActions &a) {
auto res = parse("<table><style>", {});
auto const &body = std::get<dom::Element>(res.document.html().children.at(1));
a.expect_eq(body, dom::Element{"body", {}, {dom::Element{"table", {}, {dom::Element{"style"}}}}});
});

s.add_test("InTable: </table>", [](etest::IActions &a) {
auto res = parse("<table></table>", {});
auto const &body = std::get<dom::Element>(res.document.html().children.at(1));
a.expect_eq(body, dom::Element{"body", {}, {dom::Element{"table"}}});
});
}

void in_frameset_tests(etest::Suite &s) {
s.add_test("InFrameset: boring whitespace", [](etest::IActions &a) {
auto res = parse("<head></head><frameset> ", {});
Expand Down Expand Up @@ -705,6 +756,7 @@ int main() {
in_head_noscript_tests(s);
after_head_tests(s);
in_body_tests(s);
in_table_tests(s);
in_frameset_tests(s);
return s.run();
}

0 comments on commit 265f154

Please sign in to comment.