Skip to content

Commit

Permalink
url: Move the ValidationError-enum out of the UrlParser
Browse files Browse the repository at this point in the history
  • Loading branch information
robinlinden committed Dec 17, 2023
1 parent 17c4730 commit 8cc56c5
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 71 deletions.
62 changes: 30 additions & 32 deletions url/url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,52 +47,50 @@ std::map<std::string, std::uint16_t> const special_schemes = {{"ftp", std::uint1
{"wss", std::uint16_t{443}}};

// NOLINTNEXTLINE(cert-err58-cpp)
std::map<UrlParser::ValidationError, std::string> const validation_error_str = {
{UrlParser::ValidationError::DomainToAscii, "Unicode ToASCII records an error or returns the empty string"},
{UrlParser::ValidationError::DomainToUnicode, "Unicode ToUnicode records an error"},
{UrlParser::ValidationError::DomainInvalidCodePoint, "The input's host contains a forbidden domain code point"},
{UrlParser::ValidationError::HostInvalidCodePoint,
std::map<ValidationError, std::string> const validation_error_str = {
{ValidationError::DomainToAscii, "Unicode ToASCII records an error or returns the empty string"},
{ValidationError::DomainToUnicode, "Unicode ToUnicode records an error"},
{ValidationError::DomainInvalidCodePoint, "The input's host contains a forbidden domain code point"},
{ValidationError::HostInvalidCodePoint,
"An opaque host (in a URL that is not special) contains a forbidden host code point"},
{UrlParser::ValidationError::IPv4EmptyPart, "An IPv4 address ends with a U+002E (.)"},
{UrlParser::ValidationError::IPv4TooManyParts, "An IPv4 address does not consist of exactly 4 parts"},
{UrlParser::ValidationError::IPv4NonNumericPart, "An IPv4 address part is not numeric"},
{UrlParser::ValidationError::IPv4NonDecimalPart,
{ValidationError::IPv4EmptyPart, "An IPv4 address ends with a U+002E (.)"},
{ValidationError::IPv4TooManyParts, "An IPv4 address does not consist of exactly 4 parts"},
{ValidationError::IPv4NonNumericPart, "An IPv4 address part is not numeric"},
{ValidationError::IPv4NonDecimalPart,
"The IPv4 address contains numbers expressed using hexadecimal or octal digits"},
{UrlParser::ValidationError::IPv4OutOfRangePart, "An IPv4 address part exceeds 255"},
{UrlParser::ValidationError::IPv6Unclosed, "An IPv6 address is missing the closing U+005D (])"},
{UrlParser::ValidationError::IPv6InvalidCompression, "An IPv6 address begins with improper compression"},
{UrlParser::ValidationError::IPv6TooManyPieces, "An IPv6 address contains more than 8 pieces"},
{UrlParser::ValidationError::IPv6MultipleCompression, "An IPv6 address is compressed in more than one spot"},
{UrlParser::ValidationError::IPv6InvalidCodePoint,
{ValidationError::IPv4OutOfRangePart, "An IPv4 address part exceeds 255"},
{ValidationError::IPv6Unclosed, "An IPv6 address is missing the closing U+005D (])"},
{ValidationError::IPv6InvalidCompression, "An IPv6 address begins with improper compression"},
{ValidationError::IPv6TooManyPieces, "An IPv6 address contains more than 8 pieces"},
{ValidationError::IPv6MultipleCompression, "An IPv6 address is compressed in more than one spot"},
{ValidationError::IPv6InvalidCodePoint,
"An IPv6 address contains a code point that is neither an ASCII hex digit nor a U+003A (:), or it "
"unexpectedly ends"},
{UrlParser::ValidationError::IPv6TooFewPieces, "An uncompressed IPv6 address contains fewer than 8 pieces"},
{UrlParser::ValidationError::IPv4InIPv6TooManyPieces,
{ValidationError::IPv6TooFewPieces, "An uncompressed IPv6 address contains fewer than 8 pieces"},
{ValidationError::IPv4InIPv6TooManyPieces,
"An IPv6 address with IPv4 address syntax: the IPv6 address has more than 6 pieces"},
{UrlParser::ValidationError::IPv4InIPv6InvalidCodePoint,
{ValidationError::IPv4InIPv6InvalidCodePoint,
"An IPv6 address with IPv4 address syntax: An IPv4 part is empty or contains a non-ASCII digit, an "
"IPv4 part contains a leading 0, or there are too many IPv4 parts"},
{UrlParser::ValidationError::IPv4InIPv6OutOfRangePart,
{ValidationError::IPv4InIPv6OutOfRangePart,
"An IPv6 address with IPv4 address syntax: an IPv4 part exceeds 255"},
{UrlParser::ValidationError::IPv4InIPv6TooFewParts,
{ValidationError::IPv4InIPv6TooFewParts,
"An IPv6 address with IPv4 address syntax: an IPv4 address contains too few parts"},
{UrlParser::ValidationError::InvalidUrlUnit, "A code point is found that is not a URL unit"},
{UrlParser::ValidationError::SpecialSchemeMissingFollowingSolidus,
"The input's scheme is not followed by \"//\""},
{UrlParser::ValidationError::MissingSchemeNonRelativeUrl,
{ValidationError::InvalidUrlUnit, "A code point is found that is not a URL unit"},
{ValidationError::SpecialSchemeMissingFollowingSolidus, "The input's scheme is not followed by \"//\""},
{ValidationError::MissingSchemeNonRelativeUrl,
"The input is missing a scheme, because it does not begin with an ASCII alpha, and either no base "
"URL was provided or the base URL cannot be used as a base URL because it has an opaque path"},
{UrlParser::ValidationError::InvalidReverseSolidus,
{ValidationError::InvalidReverseSolidus,
"The URL has a special scheme and it uses U+005C (\\) instead of U+002F (/)"},
{UrlParser::ValidationError::InvalidCredentials, "The input includes credentials"},
{UrlParser::ValidationError::HostMissing, "The input has a special scheme, but does not contain a host"},
{UrlParser::ValidationError::PortOutOfRange, "The input's port is too big"},
{UrlParser::ValidationError::PortInvalid, "The input's port is invalid"},
{UrlParser::ValidationError::FileInvalidWindowsDriveLetter,
{ValidationError::InvalidCredentials, "The input includes credentials"},
{ValidationError::HostMissing, "The input has a special scheme, but does not contain a host"},
{ValidationError::PortOutOfRange, "The input's port is too big"},
{ValidationError::PortInvalid, "The input's port is invalid"},
{ValidationError::FileInvalidWindowsDriveLetter,
"The input is a relative-URL string that starts with a Windows drive letter and the base URL's "
"scheme is \"file\""},
{UrlParser::ValidationError::FileInvalidWindowsDriveLetterHost,
"A file: URL's host is a Windows drive letter"}};
{ValidationError::FileInvalidWindowsDriveLetterHost, "A file: URL's host is a Windows drive letter"}};

struct PercentEncodeSet {
static constexpr bool c0_control(char c) {
Expand Down
70 changes: 35 additions & 35 deletions url/url.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,48 +109,48 @@ struct Url {
bool operator==(Url const &b) const { return serialize() == b.serialize(); }
};

enum class ValidationError {
// IDNA
DomainToAscii,
DomainToUnicode,
// Host parsing
DomainInvalidCodePoint,
HostInvalidCodePoint,
IPv4EmptyPart,
IPv4TooManyParts,
IPv4NonNumericPart,
IPv4NonDecimalPart,
IPv4OutOfRangePart,
IPv6Unclosed,
IPv6InvalidCompression,
IPv6TooManyPieces,
IPv6MultipleCompression,
IPv6InvalidCodePoint,
IPv6TooFewPieces,
IPv4InIPv6TooManyPieces,
IPv4InIPv6InvalidCodePoint,
IPv4InIPv6OutOfRangePart,
IPv4InIPv6TooFewParts,
// URL parsing
InvalidUrlUnit,
SpecialSchemeMissingFollowingSolidus,
MissingSchemeNonRelativeUrl,
InvalidReverseSolidus,
InvalidCredentials,
HostMissing,
PortOutOfRange,
PortInvalid,
FileInvalidWindowsDriveLetter,
FileInvalidWindowsDriveLetterHost
};

// This parser is current with the WHATWG URL specification as of 27 September 2023
class UrlParser final : util::BaseParser {
public:
UrlParser();

std::optional<Url> parse(std::string input, std::optional<Url> base = std::nullopt);

enum class ValidationError {
// IDNA
DomainToAscii,
DomainToUnicode,
// Host parsing
DomainInvalidCodePoint,
HostInvalidCodePoint,
IPv4EmptyPart,
IPv4TooManyParts,
IPv4NonNumericPart,
IPv4NonDecimalPart,
IPv4OutOfRangePart,
IPv6Unclosed,
IPv6InvalidCompression,
IPv6TooManyPieces,
IPv6MultipleCompression,
IPv6InvalidCodePoint,
IPv6TooFewPieces,
IPv4InIPv6TooManyPieces,
IPv4InIPv6InvalidCodePoint,
IPv4InIPv6OutOfRangePart,
IPv4InIPv6TooFewParts,
// URL parsing
InvalidUrlUnit,
SpecialSchemeMissingFollowingSolidus,
MissingSchemeNonRelativeUrl,
InvalidReverseSolidus,
InvalidCredentials,
HostMissing,
PortOutOfRange,
PortInvalid,
FileInvalidWindowsDriveLetter,
FileInvalidWindowsDriveLetterHost
};

void set_on_error(std::function<void(ValidationError)> on_error) { on_error_ = std::move(on_error); }

private:
Expand Down
8 changes: 4 additions & 4 deletions url/url_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ namespace {

struct ParseResult {
std::optional<url::Url> url;
std::vector<url::UrlParser::ValidationError> errors;
std::vector<url::ValidationError> errors;
};

ParseResult parse_url(std::string input, std::optional<url::Url> base = std::nullopt) {
std::vector<url::UrlParser::ValidationError> errors;
std::vector<url::ValidationError> errors;
url::UrlParser p;
p.set_on_error([&errors](url::UrlParser::ValidationError e) { errors.push_back(e); });
p.set_on_error([&errors](url::ValidationError e) { errors.push_back(e); });
std::optional<url::Url> url = p.parse(std::move(input), std::move(base));
return {std::move(url), std::move(errors)};
}
Expand Down Expand Up @@ -693,7 +693,7 @@ int main() {
etest::test("URL parsing: non-relative url w/o scheme", [] {
auto [url, errors] = parse_url("//example.com");
etest::expect_eq(url, std::nullopt);
etest::expect_eq(errors, std::vector{url::UrlParser::ValidationError::MissingSchemeNonRelativeUrl});
etest::expect_eq(errors, std::vector{url::ValidationError::MissingSchemeNonRelativeUrl});
});

etest::test("Web Platform Tests", [] {
Expand Down

0 comments on commit 8cc56c5

Please sign in to comment.