Skip to content

Commit

Permalink
Validate dataset IDs
Browse files Browse the repository at this point in the history
  • Loading branch information
Cito committed Jul 8, 2024
1 parent ab2717e commit e6298e2
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 44 deletions.
11 changes: 9 additions & 2 deletions src/ars/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
"""

from enum import Enum
from typing import Annotated

from ghga_service_commons.utils.utc_dates import UTCDatetime
from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, StringConstraints

__all__ = [
"AccessRequest",
Expand All @@ -45,6 +46,12 @@ class AccessRequestStatus(str, Enum):
PENDING = "pending"


# Accession format should be moved to the commons module
Accession = Annotated[
str, StringConstraints(strip_whitespace=True, pattern="^[A-Z]{1,6}[0-9]{3,18}$")
]


class AccessRequestCreationData(BaseDto):
"""All data necessary to create an access request."""

Expand All @@ -54,7 +61,7 @@ class AccessRequestCreationData(BaseDto):
description="ID of the IVA to be used for this request,"
" but this can also be specified later",
)
dataset_id: str = Field(
dataset_id: Accession = Field(
default=..., description="ID of the dataset for which access is requested"
)
email: str = Field(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_access_grants.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

USER_ID = "some-user-id"
IVA_ID = "some-iva-id"
DATASET_ID = "some-dataset-id"
DATASET_ID = "DS001"
VALID_FROM = utc_datetime(2020, 1, 1, 0, 0)
VALID_UNTIL = utc_datetime(2020, 12, 31, 23, 59)

Expand Down
42 changes: 30 additions & 12 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

CREATION_DATA = {
"user_id": "[email protected]",
"dataset_id": "some-dataset",
"dataset_id": "DS001",
"iva_id": "some-iva",
"email": "[email protected]",
"request_text": "Can I access some dataset?",
Expand Down Expand Up @@ -140,6 +140,24 @@ async def test_create_access_request_that_is_too_long(
assert response.json()["detail"] == "Access end date is invalid"


async def test_create_access_request_with_invalid_dataset_id(
joint_fixture: JointFixture, auth_headers_doe: dict[str, str]
):
"""Test that an access request must have a valid dataset ID."""
response = await joint_fixture.rest_client.post(
"/access-requests",
json={
**CREATION_DATA,
"dataset_id": "This is not a valid dataset ID!",
},
headers=auth_headers_doe,
)
assert response.status_code == 422
msg = str(response.json()["detail"])
assert "dataset_id" in msg
assert "String should match pattern" in msg


async def test_get_access_requests(
joint_fixture: JointFixture,
auth_headers_doe: dict[str, str],
Expand Down Expand Up @@ -176,7 +194,7 @@ async def test_get_access_requests(
assert request["id"] == access_request_id
assert request["user_id"] == "[email protected]"
assert request["iva_id"] == "some-iva"
assert request["dataset_id"] == "some-dataset"
assert request["dataset_id"] == "DS001"
assert request["status"] == "pending"

# get all requests as data steward
Expand All @@ -191,12 +209,12 @@ async def test_get_access_requests(
# last made request comes first
assert request["id"] == another_access_request_id
assert request["user_id"] == "[email protected]"
assert request["dataset_id"] == "some-dataset"
assert request["dataset_id"] == "DS001"
assert request["status"] == "pending"
request = requests[1]
assert request["id"] == access_request_id
assert request["user_id"] == "[email protected]"
assert request["dataset_id"] == "some-dataset"
assert request["dataset_id"] == "DS001"
assert request["status"] == "pending"


Expand Down Expand Up @@ -245,13 +263,13 @@ async def test_filter_access_requests(
assert len(response.json()) == 0

response = await client.get(
"/access-requests?dataset_id=some-dataset", headers=auth_headers_doe
"/access-requests?dataset_id=DS001", headers=auth_headers_doe
)
assert response.status_code == 200
assert len(response.json()) == 1

response = await client.get(
"/access-requests?dataset_id=another-dataset", headers=auth_headers_doe
"/access-requests?dataset_id=DS002", headers=auth_headers_doe
)
assert response.status_code == 200
assert len(response.json()) == 0
Expand All @@ -271,7 +289,7 @@ async def test_filter_access_requests(
# combined filter
response = await client.get(
"/access-requests?"
"[email protected]&dataset_id=some-dataset&status=pending",
"[email protected]&dataset_id=DS001&status=pending",
headers=auth_headers_doe,
)
assert response.status_code == 200
Expand All @@ -289,7 +307,7 @@ async def test_patch_access_request(
httpx_mock.add_response(
method="POST",
url="http://access/users/[email protected]"
"/ivas/some-iva/datasets/some-dataset",
"/ivas/some-iva/datasets/DS001",
status_code=204,
)

Expand Down Expand Up @@ -347,7 +365,7 @@ async def test_patch_access_request(
assert request["id"] == access_request_id
assert request["user_id"] == "[email protected]"
assert request["iva_id"] == "some-iva"
assert request["dataset_id"] == "some-dataset"
assert request["dataset_id"] == "DS001"
assert request["status"] == "allowed"
assert request["status_changed"]
assert request["changed_by"] is None # cannot see internals
Expand All @@ -364,7 +382,7 @@ async def test_patch_access_request(
assert request["id"] == access_request_id
assert request["user_id"] == "[email protected]"
assert request["iva_id"] == "some-iva"
assert request["dataset_id"] == "some-dataset"
assert request["dataset_id"] == "DS001"
assert request["status"] == "allowed"
assert request["status_changed"]
assert request["changed_by"] == "[email protected]" # can see internals
Expand All @@ -381,7 +399,7 @@ async def test_patch_access_request_with_another_iva(
httpx_mock.add_response(
method="POST",
url="http://access/users/[email protected]"
"/ivas/another-iva/datasets/some-dataset",
"/ivas/another-iva/datasets/DS001",
status_code=204,
)

Expand Down Expand Up @@ -415,7 +433,7 @@ async def test_patch_access_request_with_another_iva(
assert request["user_id"] == "[email protected]"
# make sure that the IVA has been changed
assert request["iva_id"] == "another-iva"
assert request["dataset_id"] == "some-dataset"
assert request["dataset_id"] == "DS001"
assert request["status"] == "allowed"
assert request["status_changed"]
assert request["changed_by"] is None
Expand Down
4 changes: 2 additions & 2 deletions tests/test_event_pub.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ async def test_access_request_events(status: str):
"""Test that an event is published properly."""
request = AccessRequest(
id="unique_access_request_id",
user_id="user123",
dataset_id="dataset456",
user_id="user-123",
dataset_id="DS456",
email="[email protected]",
request_text="Requesting access for research purposes",
access_starts=now_as_utc(),
Expand Down
50 changes: 23 additions & 27 deletions tests/test_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
AccessRequest(
id="request-id-1",
user_id="[email protected]",
dataset_id="some-dataset",
dataset_id="DS001",
email="[email protected]",
request_text="Can I access some dataset?",
access_starts=utc_datetime(2020, 1, 1, 0, 0),
Expand All @@ -91,7 +91,7 @@
AccessRequest(
id="request-id-2",
user_id="[email protected]",
dataset_id="another-dataset",
dataset_id="DS002",
email="[email protected]",
request_text="Can I access another dataset?",
access_starts=utc_datetime(2020, 1, 1, 0, 0),
Expand All @@ -105,7 +105,7 @@
AccessRequest(
id="request-id-3",
user_id="[email protected]",
dataset_id="yet-another-dataset",
dataset_id="DS003",
email="[email protected]",
request_text="Can I access yet another dataset?",
access_starts=utc_datetime(2020, 1, 1, 0, 0),
Expand All @@ -119,7 +119,7 @@
AccessRequest(
id="request-id-4",
user_id="[email protected]",
dataset_id="new-dataset",
dataset_id="DS007",
email="[email protected]",
request_text="Can I access a new dataset?",
access_starts=utc_datetime(2021, 1, 1, 0, 0),
Expand All @@ -133,7 +133,7 @@
AccessRequest(
id="request-id-5",
user_id="[email protected]",
dataset_id="some-dataset",
dataset_id="DS001",
email="[email protected]",
request_text="Can I access the same dataset as Joe?",
access_starts=utc_datetime(2020, 1, 1, 0, 0),
Expand All @@ -148,7 +148,7 @@
id="request-id-6",
user_id="[email protected]",
iva_id="iva-of-john",
dataset_id="yet-another-dataset",
dataset_id="DS003",
email="[email protected]",
request_text="Can I access yet another dataset using this IVA?",
access_starts=utc_datetime(2021, 6, 1, 0, 0),
Expand Down Expand Up @@ -297,7 +297,7 @@ async def test_can_create_request():
access_ends = access_starts + ONE_YEAR
creation_data = AccessRequestCreationData(
user_id="[email protected]",
dataset_id="some-dataset",
dataset_id="DS001",
email="[email protected]",
request_text="Can I access some dataset?",
access_starts=access_starts,
Expand All @@ -311,7 +311,7 @@ async def test_can_create_request():
assert request.id == "newly-created-id"
assert request.user_id == "[email protected]"
assert request.iva_id is None
assert request.dataset_id == "some-dataset"
assert request.dataset_id == "DS001"
assert request.email == "[email protected]"
assert request.request_text == "Can I access some dataset?"
assert request.access_starts == request.request_created
Expand Down Expand Up @@ -340,7 +340,7 @@ async def test_can_create_request_with_an_iva():
creation_data = AccessRequestCreationData(
user_id="[email protected]",
iva_id="some-iva_id",
dataset_id="some-dataset",
dataset_id="DS001",
email="[email protected]",
request_text="Can I access some dataset?",
access_starts=access_starts,
Expand All @@ -352,7 +352,7 @@ async def test_can_create_request_with_an_iva():
assert request.id == "newly-created-id"
assert request.user_id == "[email protected]"
assert request.iva_id == "some-iva_id"
assert request.dataset_id == "some-dataset"
assert request.dataset_id == "DS001"

assert dao.last_upsert == request

Expand All @@ -363,7 +363,7 @@ async def test_cannot_create_request_for_somebody_else():
access_ends = access_starts + ONE_YEAR
creation_data = AccessRequestCreationData(
user_id="[email protected]",
dataset_id="some-dataset",
dataset_id="DS001",
email="[email protected]",
request_text="Can I access some dataset?",
access_starts=access_starts,
Expand All @@ -380,7 +380,7 @@ async def test_silently_correct_request_that_is_too_early():
access_ends = access_starts + 1.5 * ONE_YEAR
creation_data = AccessRequestCreationData(
user_id="[email protected]",
dataset_id="some-dataset",
dataset_id="DS001",
email="[email protected]",
request_text="Can I access some dataset?",
access_starts=access_starts,
Expand All @@ -406,7 +406,7 @@ async def test_cannot_create_request_too_much_in_advance():
access_ends = access_starts + ONE_YEAR
creation_data = AccessRequestCreationData(
user_id="[email protected]",
dataset_id="some-dataset",
dataset_id="DS001",
email="[email protected]",
request_text="Can I access some dataset?",
access_starts=access_starts,
Expand All @@ -429,7 +429,7 @@ async def test_cannot_create_request_too_short():
access_ends = access_starts + timedelta(days=29)
creation_data = AccessRequestCreationData(
user_id="[email protected]",
dataset_id="some-dataset",
dataset_id="DS001",
email="[email protected]",
request_text="Can I access some dataset?",
access_starts=access_starts,
Expand All @@ -452,7 +452,7 @@ async def test_cannot_create_request_too_long():
access_ends = access_starts + 2.5 * ONE_YEAR
creation_data = AccessRequestCreationData(
user_id="[email protected]",
dataset_id="some-dataset",
dataset_id="DS001",
email="[email protected]",
request_text="Can I access some dataset?",
access_starts=access_starts,
Expand Down Expand Up @@ -515,14 +515,12 @@ async def test_data_steward_can_get_requests_of_specific_user():

async def test_data_steward_can_get_requests_for_specific_dataset():
"""Test getting requests for a specific dataset."""
requests = await repository.get(
auth_context=auth_context_doe, dataset_id="another-dataset"
)
requests = await repository.get(auth_context=auth_context_doe, dataset_id="DS002")
assert len(requests) == 1
assert requests == [
request.model_copy(update={"changed_by": None}) # data steward is hidden
for request in ACCESS_REQUESTS
if request.dataset_id == "another-dataset"
if request.dataset_id == "DS002"
]


Expand All @@ -544,15 +542,15 @@ async def test_filtering_using_multiple_criteria():
requests = await repository.get(
auth_context=auth_context_steward,
user_id="[email protected]",
dataset_id="some-dataset",
dataset_id="DS001",
status=AccessRequestStatus.ALLOWED,
)
assert len(requests) == 1
assert requests == [
request
for request in ACCESS_REQUESTS
if request.user_id == "[email protected]"
and request.dataset_id == "some-dataset"
and request.dataset_id == "DS001"
and request.status == AccessRequestStatus.ALLOWED
]

Expand Down Expand Up @@ -591,7 +589,7 @@ async def test_set_status_to_allowed():
assert event_publisher.events == [expected_event]

assert access_grants.last_grant == (
"to [email protected] with some-iva for new-dataset"
"to [email protected] with some-iva for DS007"
" from 2021-01-01 00:00:00+00:00 until 2021-12-31 23:59:00+00:00"
)

Expand Down Expand Up @@ -628,7 +626,7 @@ async def test_set_status_to_allowed_reusing_iva():
assert event_publisher.events == [expected_event]

assert access_grants.last_grant == (
"to [email protected] with iva-of-john for yet-another-dataset"
"to [email protected] with iva-of-john for DS003"
" from 2021-06-01 00:00:00+00:00 until 2021-12-31 23:59:00+00:00"
)

Expand Down Expand Up @@ -667,7 +665,7 @@ async def test_set_status_to_allowed_overriding_iva():
assert event_publisher.events == [expected_event]

assert access_grants.last_grant == (
"to [email protected] with some-other-iva-of-john for yet-another-dataset"
"to [email protected] with some-other-iva-of-john for DS003"
" from 2021-06-01 00:00:00+00:00 until 2021-12-31 23:59:00+00:00"
)

Expand Down Expand Up @@ -709,9 +707,7 @@ async def test_set_status_to_allowed_with_error_when_granting_access():
auth_context=auth_context_steward,
)

assert (
access_grants.last_grant == "to [email protected] for new-dataset failed"
)
assert access_grants.last_grant == "to [email protected] for DS007 failed"

# make sure the status is not changed in this case, and no mails are sent out
changed_request = dao.last_upsert
Expand Down

0 comments on commit e6298e2

Please sign in to comment.