Skip to content

Commit

Permalink
Return auth specific errors when downloading media (#73)
Browse files Browse the repository at this point in the history
Fixes #72 

Also updates the auth media error code docs to move the auth media
specific errors up into the `Error codes` section with the rest of the
possible content scanner errors.
  • Loading branch information
devonh authored Dec 16, 2024
1 parent 553517c commit 5690feb
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 5 deletions.
8 changes: 3 additions & 5 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ status code of the response for each scenario:
|-------------|-------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 400 | `MCS_MALFORMED_JSON` | The request body contains malformed JSON. |
| 400 | `MCS_MEDIA_FAILED_TO_DECRYPT` | The server failed to decrypt the encrypted media downloaded from the media repo. |
| 401 | `M_MISSING_TOKEN` | The request is missing a required access token for authentication. |
| 401 | `M_UNKNOWN_TOKEN` | The access token provided for authentication is not valid. |
| 404 | `M_NOT_FOUND` | The `Authorization` header was missing when requesting authenticated media. |
| 404 | `M_NOT_FOUND` | No route could be found at the given path. |
| 404 | `M_NOT_FOUND` | The requested media was not present in the media repo. |
| 403 | `MCS_MEDIA_NOT_CLEAN` | The server scanned the downloaded media but the antivirus script returned a non-zero exit code. |
Expand Down Expand Up @@ -199,8 +202,3 @@ Example authorization header:
```
Authorization: Bearer <access_token>
```

If a request is made for authenticated media and the access token is invalid, the content scanner
will respond with HTTP status 502, errcode `MCS_MEDIA_REQUEST_FAILED`.
If a request is made for authenticated media and the `Authorization` header is missing, the content
scanner will respond with HTTP status 404, errcode `M_NOT_FOUND`.
18 changes: 18 additions & 0 deletions src/matrix_content_scanner/scanner/file_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,24 @@ async def _get_file_content(
except (json.decoder.JSONDecodeError, KeyError):
pass

if code == 401:
try:
err = json.loads(body)
if err["errcode"] == ErrCode.MISSING_TOKEN:
raise ContentScannerRestError(
HTTPStatus.UNAUTHORIZED,
ErrCode.MISSING_TOKEN,
"Access token missing from request",
)
if err["errcode"] == ErrCode.UNKNOWN_TOKEN:
raise ContentScannerRestError(
HTTPStatus.UNAUTHORIZED,
ErrCode.UNKNOWN_TOKEN,
"Invalid access token passed",
)
except (json.decoder.JSONDecodeError, KeyError):
pass

if code == 404:
raise _PathNotFoundException

Expand Down
9 changes: 9 additions & 0 deletions src/matrix_content_scanner/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@ class ErrCode(str, Enum):
# - No route was found with the path and method provided in the request.
# - The homeserver does not have the requested piece of media.
NOT_FOUND = "M_NOT_FOUND"
# The access token is missing from the request.
MISSING_TOKEN = "M_MISSING_TOKEN"
# The provided access token is invalid.
# One of the following:
# - the access token was never valid.
# - the access token has been logged out.
# - the access token has been soft logged out.
# - [Added in v1.3] the access token needs to be refreshed.
UNKNOWN_TOKEN = "M_UNKNOWN_TOKEN"
# The file failed the scan.
NOT_CLEAN = "MCS_MEDIA_NOT_CLEAN"
# The file could not be retrieved from the homeserver.
Expand Down
48 changes: 48 additions & 0 deletions tests/scanner/test_file_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,54 @@ async def test_download_auth_media(self) -> None:
self.assertTrue(args[0].startswith("http://my-site.com/"))
self.assertIn("/_matrix/client/v1/media/download/" + MEDIA_PATH, args[0])

async def test_download_auth_media_invalid_token(self) -> None:
"""Tests that downloading an authenticated media file with an invalid access
token returns the correct error code.
"""
self.media_status = 401
self.media_body = (
b'{"errcode":"M_UNKNOWN_TOKEN","error":"Invalid access token"}'
)
self._set_headers({"content-type": ["application/json"]})

# Check that we fail at downloading the file.
with self.assertRaises(ContentScannerRestError) as cm:
await self.downloader.download_file(
MEDIA_PATH, auth_header="Bearer access_token"
)

self.assertEqual(cm.exception.http_status, 401)
self.assertEqual(cm.exception.reason, "M_UNKNOWN_TOKEN")

# Check that we tried downloading from the set base URL.
args = self.get_mock.call_args.args
self.assertTrue(args[0].startswith("http://my-site.com/"))
self.assertIn("/_matrix/client/v1/media/download/" + MEDIA_PATH, args[0])

async def test_download_auth_media_missing_token(self) -> None:
"""Tests that downloading an authenticated media file with a missing access
token returns the correct error code.
"""
self.media_status = 401
self.media_body = (
b'{"errcode":"M_MISSING_TOKEN","error":"Missing access token"}'
)
self._set_headers({"content-type": ["application/json"]})

# Check that we fail at downloading the file.
with self.assertRaises(ContentScannerRestError) as cm:
await self.downloader.download_file(
MEDIA_PATH, auth_header="Bearer access_token"
)

self.assertEqual(cm.exception.http_status, 401)
self.assertEqual(cm.exception.reason, "M_MISSING_TOKEN")

# Check that we tried downloading from the set base URL.
args = self.get_mock.call_args.args
self.assertTrue(args[0].startswith("http://my-site.com/"))
self.assertIn("/_matrix/client/v1/media/download/" + MEDIA_PATH, args[0])

async def test_no_base_url(self) -> None:
"""Tests that configuring a base homeserver URL means files are downloaded from
that homeserver (rather than the one the files were uploaded to) and .well-known
Expand Down

0 comments on commit 5690feb

Please sign in to comment.