From bb85c26a2b877965c5e0a0cd841b7f676ec2533e Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Mon, 23 Dec 2024 04:37:17 -0600 Subject: [PATCH] Fix cache pollution from mutable reference (#3887) - Removes manual overrides of copy behavior and leaves it up to the caller. - E.g. a future use case may require a non-deepcopy. If we override copy they would have to remove the dunder copy, update every implementation which relies copy, before finally creating their own copy implementation. - Deepcopies the flag buffer. - Though we do not cache mutable references yet we may soon and so this foot gun should be removed from possibility. - Removes "copy" test coverage from `test_lru_cache.py`. We're no longer assuming copy usage and leave it up to the caller. - The existing test in `tests/test_scope.py` covers the cache pollution case [originally mentioned here](https://github.com/getsentry/sentry-python/issues/3852). - The mutable cache pollution case is not covered because we do not currently cache mutable objects. In general a generic class should assume as few implementation details as possible. If we leave the existing copy method someone may assume copy semantics and rely on it in a way that is inappropriate. Closes: https://github.com/getsentry/sentry-python/issues/3886 Co-authored-by: Anton Pirker --- sentry_sdk/_lru_cache.py | 9 ------- sentry_sdk/flag_utils.py | 7 ------ sentry_sdk/scope.py | 4 +-- tests/test_lru_cache.py | 53 ---------------------------------------- 4 files changed, 2 insertions(+), 71 deletions(-) diff --git a/sentry_sdk/_lru_cache.py b/sentry_sdk/_lru_cache.py index 09eae27df2..cbadd9723b 100644 --- a/sentry_sdk/_lru_cache.py +++ b/sentry_sdk/_lru_cache.py @@ -17,15 +17,6 @@ def __init__(self, max_size): self.hits = self.misses = 0 self.full = False - def __copy__(self): - # type: () -> LRUCache - new = LRUCache(max_size=self.max_size) - new.hits = self.hits - new.misses = self.misses - new.full = self.full - new._data = self._data.copy() - return new - def set(self, key, value): # type: (Any, Any) -> None current = self._data.pop(key, _SENTINEL) diff --git a/sentry_sdk/flag_utils.py b/sentry_sdk/flag_utils.py index 2b345a7f0b..cf4800e855 100644 --- a/sentry_sdk/flag_utils.py +++ b/sentry_sdk/flag_utils.py @@ -1,4 +1,3 @@ -from copy import copy from typing import TYPE_CHECKING import sentry_sdk @@ -25,12 +24,6 @@ def clear(self): # type: () -> None self.buffer = LRUCache(self.capacity) - def __copy__(self): - # type: () -> FlagBuffer - buffer = FlagBuffer(capacity=self.capacity) - buffer.buffer = copy(self.buffer) - return buffer - def get(self): # type: () -> list[FlagData] return [{"flag": key, "result": value} for key, value in self.buffer.get_all()] diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index bb45143c48..cf72fabdd1 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -1,7 +1,7 @@ import os import sys import warnings -from copy import copy +from copy import copy, deepcopy from collections import deque from contextlib import contextmanager from enum import Enum @@ -252,7 +252,7 @@ def __copy__(self): rv._last_event_id = self._last_event_id - rv._flags = copy(self._flags) + rv._flags = deepcopy(self._flags) return rv diff --git a/tests/test_lru_cache.py b/tests/test_lru_cache.py index 1a54ed83d3..3e9c0ac964 100644 --- a/tests/test_lru_cache.py +++ b/tests/test_lru_cache.py @@ -1,5 +1,4 @@ import pytest -from copy import copy, deepcopy from sentry_sdk._lru_cache import LRUCache @@ -59,55 +58,3 @@ def test_cache_get_all(): assert cache.get_all() == [(1, 1), (2, 2), (3, 3)] cache.get(1) assert cache.get_all() == [(2, 2), (3, 3), (1, 1)] - - -def test_cache_copy(): - cache = LRUCache(3) - cache.set(0, 0) - cache.set(1, 1) - - copied = copy(cache) - cache.set(2, 2) - cache.set(3, 3) - assert copied.get_all() == [(0, 0), (1, 1)] - assert cache.get_all() == [(1, 1), (2, 2), (3, 3)] - - copied = copy(cache) - cache.get(1) - assert copied.get_all() == [(1, 1), (2, 2), (3, 3)] - assert cache.get_all() == [(2, 2), (3, 3), (1, 1)] - - -def test_cache_deepcopy(): - cache = LRUCache(3) - cache.set(0, 0) - cache.set(1, 1) - - copied = deepcopy(cache) - cache.set(2, 2) - cache.set(3, 3) - assert copied.get_all() == [(0, 0), (1, 1)] - assert cache.get_all() == [(1, 1), (2, 2), (3, 3)] - - copied = deepcopy(cache) - cache.get(1) - assert copied.get_all() == [(1, 1), (2, 2), (3, 3)] - assert cache.get_all() == [(2, 2), (3, 3), (1, 1)] - - -def test_cache_pollution(): - cache1 = LRUCache(max_size=2) - cache1.set(1, True) - cache2 = copy(cache1) - cache2.set(1, False) - assert cache1.get(1) is True - assert cache2.get(1) is False - - -def test_cache_pollution_deepcopy(): - cache1 = LRUCache(max_size=2) - cache1.set(1, True) - cache2 = deepcopy(cache1) - cache2.set(1, False) - assert cache1.get(1) is True - assert cache2.get(1) is False