Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

httputil: Fix quadratic performance of cookie parsing #3447

Merged
merged 1 commit into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 10 additions & 28 deletions tornado/httputil.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,15 +1065,20 @@ def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:
yield (k, v)


_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
_QuotePatt = re.compile(r"[\\].")
_nulljoin = "".join
_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub


def _unquote_replace(m: re.Match) -> str:
if m[1]:
return chr(int(m[1], 8))
else:
return m[2]


def _unquote_cookie(s: str) -> str:
"""Handle double quotes and escaping in cookie values.

This method is copied verbatim from the Python 3.5 standard
This method is copied verbatim from the Python 3.13 standard
library (http.cookies._unquote) so we don't have to depend on
non-public interfaces.
"""
Expand All @@ -1094,30 +1099,7 @@ def _unquote_cookie(s: str) -> str:
# \012 --> \n
# \" --> "
#
i = 0
n = len(s)
res = []
while 0 <= i < n:
o_match = _OctalPatt.search(s, i)
q_match = _QuotePatt.search(s, i)
if not o_match and not q_match: # Neither matched
res.append(s[i:])
break
# else:
j = k = -1
if o_match:
j = o_match.start(0)
if q_match:
k = q_match.start(0)
if q_match and (not o_match or k < j): # QuotePatt matched
res.append(s[i:k])
res.append(s[k + 1])
i = k + 2
else: # OctalPatt matched
res.append(s[i:j])
res.append(chr(int(s[j + 1 : j + 4], 8)))
i = j + 4
return _nulljoin(res)
return _unquote_sub(_unquote_replace, s)


def parse_cookie(cookie: str) -> Dict[str, str]:
Expand Down
46 changes: 46 additions & 0 deletions tornado/test/httputil_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,3 +560,49 @@ def test_invalid_cookies(self):
self.assertEqual(
parse_cookie(" = b ; ; = ; c = ; "), {"": "b", "c": ""}
)

def test_unquote(self):
# Copied from
# https://github.com/python/cpython/blob/dc7a2b6522ec7af41282bc34f405bee9b306d611/Lib/test/test_http_cookies.py#L62
cases = [
(r'a="b=\""', 'b="'),
(r'a="b=\\"', "b=\\"),
(r'a="b=\="', "b=="),
(r'a="b=\n"', "b=n"),
(r'a="b=\042"', 'b="'),
(r'a="b=\134"', "b=\\"),
(r'a="b=\377"', "b=\xff"),
(r'a="b=\400"', "b=400"),
(r'a="b=\42"', "b=42"),
(r'a="b=\\042"', "b=\\042"),
(r'a="b=\\134"', "b=\\134"),
(r'a="b=\\\""', 'b=\\"'),
(r'a="b=\\\042"', 'b=\\"'),
(r'a="b=\134\""', 'b=\\"'),
(r'a="b=\134\042"', 'b=\\"'),
]
for encoded, decoded in cases:
with self.subTest(encoded):
c = parse_cookie(encoded)
self.assertEqual(c["a"], decoded)

def test_unquote_large(self):
# Adapted from
# https://github.com/python/cpython/blob/dc7a2b6522ec7af41282bc34f405bee9b306d611/Lib/test/test_http_cookies.py#L87
# Modified from that test because we handle semicolons differently from the stdlib.
#
# This is a performance regression test: prior to improvements in Tornado 6.4.2, this test
# would take over a minute with n= 100k. Now it runs in tens of milliseconds.
n = 100000
for encoded in r"\\", r"\134":
with self.subTest(encoded):
start = time.time()
data = 'a="b=' + encoded * n + '"'
value = parse_cookie(data)["a"]
end = time.time()
self.assertEqual(value[:3], "b=\\")
self.assertEqual(value[-3:], "\\\\\\")
self.assertEqual(len(value), n + 2)

# Very loose performance check to avoid false positives
self.assertLess(end - start, 1, "Test took too long")