Skip to content

Commit

Permalink
Add CACHE_MIN_FRESH constant and update caching behavior in API calls
Browse files Browse the repository at this point in the history
  • Loading branch information
TheByronHimes committed Jan 7, 2025
1 parent 44717b5 commit 2f28afa
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 54 deletions.
1 change: 1 addition & 0 deletions src/ghga_connector/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@
MAX_PART_NUMBER = 10000
MAX_RETRIES = 5
MAX_WAIT_TIME = 60 * 60
CACHE_MIN_FRESH = 3
9 changes: 6 additions & 3 deletions src/ghga_connector/core/downloading/api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import httpx

from ghga_connector.constants import TIMEOUT_LONG
from ghga_connector.constants import CACHE_MIN_FRESH, TIMEOUT_LONG
from ghga_connector.core import WorkPackageAccessor, exceptions

from .structs import (
Expand All @@ -34,7 +34,10 @@ async def _get_authorization(
) -> httpx.Headers:
"""
Fetch work order token using accessor and prepare DCS endpoint URL and headers for a
given endpoint identified by the `url` passed
given endpoint identified by the `url` passed.
The calls will use the cache if possible while the cached responses are still fresh
for at least another `CACHE_MIN_FRESH` seconds.
"""
# fetch a work order token
decrypted_token = await work_package_accessor.get_work_order_token(file_id=file_id)
Expand All @@ -44,7 +47,7 @@ async def _get_authorization(
"Accept": "application/json",
"Authorization": f"Bearer {decrypted_token}",
"Content-Type": "application/json",
"Cache-Control": "min-fresh=3", # make configurable?
"Cache-Control": f"min-fresh={CACHE_MIN_FRESH}",
}
)

Expand Down
9 changes: 8 additions & 1 deletion src/ghga_connector/core/work_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from ghga_service_commons.utils.crypt import decrypt
from tenacity import RetryError

from ghga_connector.constants import CACHE_MIN_FRESH

from . import exceptions, retry_handler


Expand Down Expand Up @@ -94,7 +96,12 @@ async def get_work_order_token(self, *, file_id: str) -> str:
url = f"{self.api_url}/work-packages/{self.package_id}/files/{file_id}/work-order-tokens"

# send authorization header as bearer token
headers = httpx.Headers({"Authorization": f"Bearer {self.access_token}"})
headers = httpx.Headers(
{
"Authorization": f"Bearer {self.access_token}",
"Cache-Control": f"min-fresh={CACHE_MIN_FRESH}",
}
)
response = await self._call_url(fn=self.client.post, headers=headers, url=url)

status_code = response.status_code
Expand Down
10 changes: 6 additions & 4 deletions tests/fixtures/mock_api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ async def health():


@mock_external_app.get("/objects/{file_id}")
async def drs3_objects(file_id: str, request: Request, url_expires_after: UrlLifespan):
async def drs3_objects(file_id: str, request: Request, expires_after: UrlLifespan):
"""Mock for the drs3 /objects/{file_id} call.
The `url_expires_after` parameter is an app dependency that is overridden by tests
Expand Down Expand Up @@ -207,7 +207,7 @@ async def drs3_objects(file_id: str, request: Request, url_expires_after: UrlLif
await update_presigned_url_placeholder()
return Response(
status_code=200,
headers=create_caching_headers(expires_after=url_expires_after),
headers=create_caching_headers(expires_after=expires_after),
content=DrsObjectServe(
file_id=file_id,
self_uri=f"drs://localhost:8080//{file_id}",
Expand Down Expand Up @@ -410,10 +410,12 @@ async def ulc_patch_uploads(upload_id: str, request: Request):
async def create_work_order_token(package_id: str, file_id: str):
"""Mock Work Order Token endpoint.
Cached response will be valid for 3 seconds for testing purposes.
Cached response will be valid for 5 seconds for testing purposes.
Since client requests (should) use the min-fresh cache-control header value of 3
seconds, the cached responses will be used for 2 seconds before making new requests.
"""
# has to be at least 48 chars long
headers = create_caching_headers(expires_after=3)
headers = create_caching_headers(expires_after=5)
return JSONResponse(
status_code=201,
content=base64.b64encode(b"1234567890" * 5).decode(),
Expand Down
9 changes: 1 addition & 8 deletions tests/integration/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ def set_presigned_url_update_endpoint(
bucket_id: str,
object_id: str,
expires_after: int,
validity_buffer: int = 3,
):
"""Temporarily assign the S3 download URL update endpoint in the mock app.
Expand All @@ -137,14 +136,8 @@ async def update_presigned_url_actual():
update_presigned_url_actual,
)

# Pretend we're in the DCS:
# For the response we'll send to the Connector, make the caching header expire a
# few seconds prior to the hard S3 expiration so we proactively retrieve a fresh
# download URL without dealing with expired URLs.
cache_lifespan = max(2, expires_after - validity_buffer)

# Override the app dependency so it uses the new cache lifespan
mock_external_app.dependency_overrides[url_expires_after] = lambda: cache_lifespan
mock_external_app.dependency_overrides[url_expires_after] = lambda: expires_after


@pytest.mark.parametrize(
Expand Down
39 changes: 1 addition & 38 deletions tests/unit/test_api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,43 +84,6 @@ async def post(self, *args, **kwargs) -> httpx.Response:
return await self._do_request("post", *args, **kwargs)


async def test_get_work_package_caching(monkeypatch, httpx_mock: HTTPXMock):
"""Test the caching of call to get work package information."""
monkeypatch.setattr("ghga_connector.core.client.httpx.AsyncClient", RecordingClient)
async with async_client() as client:
assert isinstance(client, RecordingClient)
accessor = WorkPackageAccessor(
api_url=API_URL,
client=client,
dcs_api_url="",
my_private_key=b"test-private",
my_public_key=b"test-public",
access_token="",
package_id="wp_1",
)
# add mocked response
add_httpx_response = partial(
httpx_mock.add_response,
status_code=200,
json={"files": {"file-id-1": ".json"}},
headers=create_caching_headers(1),
)
add_httpx_response()
await accessor.get_package_files()
assert client.calls
client.assert_last_call_not_from_cache()

# Make same call and verify that the call came from the cache instead
await accessor.get_package_files()
client.assert_last_call_from_cache()

# Wait 1 second to make sure the cache expires, then call again
add_httpx_response()
await asyncio.sleep(1)
await accessor.get_package_files()
client.assert_last_call_not_from_cache()


async def test_get_work_order_token_caching(monkeypatch, httpx_mock: HTTPXMock):
"""Test the caching of call to the WPS to get a work order token.
Expand Down Expand Up @@ -149,7 +112,7 @@ async def test_get_work_order_token_caching(monkeypatch, httpx_mock: HTTPXMock):
httpx_mock.add_response,
status_code=201,
json=base64.b64encode(b"1234567890" * 5).decode(),
headers=create_caching_headers(1),
headers=create_caching_headers(3),
)
add_httpx_response()
await accessor.get_work_order_token(file_id=file_id)
Expand Down

0 comments on commit 2f28afa

Please sign in to comment.