-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
lib: disallow < in URLs when parsing HTML
Make sure that links placed verbatim inside HTML elements' bodies are not parsed along with adjacent HTML tags as illustrated in the new test case. Also change the existing code to use the idiomatic Go way to get a set-like functionality. Changelog-fixed: Parsed links in HTML message parts now do not include trailing HTML tags. Signed-off-by: Karel Balej <[email protected]> Tested-by: Jakub Růžička <[email protected]>
- Loading branch information
Showing
3 changed files
with
25 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ func TestHyperlinks(t *testing.T) { | |
name string | ||
text string | ||
links []string | ||
html bool | ||
}{ | ||
{ | ||
name: "http-link", | ||
|
@@ -48,6 +49,7 @@ func TestHyperlinks(t *testing.T) { | |
name: "https-link-in-html", | ||
text: "<a href=\"https://aerc-mail.org\">", | ||
links: []string{"https://aerc-mail.org"}, | ||
html: true, | ||
}, | ||
{ | ||
name: "https-link-twice", | ||
|
@@ -84,6 +86,12 @@ func TestHyperlinks(t *testing.T) { | |
text: "text https://www.ics.uci.edu/pub/ietf/uri/#Related more text", | ||
links: []string{"https://www.ics.uci.edu/pub/ietf/uri/#Related"}, | ||
}, | ||
{ | ||
name: "https-in-html", | ||
text: "<div>text https://example.com/test<br>https://test.org/?a=b</div><br> text", | ||
links: []string{"https://example.com/test", "https://test.org/?a=b"}, | ||
html: true, | ||
}, | ||
{ | ||
name: "https-with-query", | ||
text: "text https://www.example.com/index.php?id_sezione=360&sid=3a5ebc944f41daa6f849f730f1 more text", | ||
|
@@ -118,28 +126,32 @@ func TestHyperlinks(t *testing.T) { | |
name: "simple email in <a href>", | ||
text: `<a href="mailto:[email protected]" rel="noopener noreferrer">`, | ||
links: []string{"mailto:[email protected]"}, | ||
html: true, | ||
}, | ||
{ | ||
name: "simple email in <a> body", | ||
text: `<a href="#" rel="noopener noreferrer">[email protected]</a><br/><p>more text</p>`, | ||
links: []string{"mailto:[email protected]"}, | ||
html: true, | ||
}, | ||
{ | ||
name: "emails in <a> href and body", | ||
text: `<a href="mailto:[email protected]" rel="noopener noreferrer">[email protected]</a><br/><p>more text</p>`, | ||
links: []string{"mailto:[email protected]", "mailto:[email protected]"}, | ||
html: true, | ||
}, | ||
{ | ||
name: "email in <...>", | ||
text: `<div>01.02.2023, 10:11, "Firstname Lastname" <[email protected]>:</div>`, | ||
links: []string{"mailto:[email protected]"}, | ||
html: true, | ||
}, | ||
} | ||
|
||
for i, test := range tests { | ||
t.Run(test.name, func(t *testing.T) { | ||
// make sure reader is exact copy of input reader | ||
reader, parsedLinks := parse.HttpLinks(strings.NewReader(test.text)) | ||
reader, parsedLinks := parse.HttpLinks(strings.NewReader(test.text), test.html) | ||
if _, err := io.ReadAll(reader); err != nil { | ||
t.Skipf("could not read text: %v", err) | ||
} | ||
|