From 9139d4d0da7b001cd76fd7aaca7da6f0d848d09e Mon Sep 17 00:00:00 2001 From: Pauline Ribeyre <4224001+paulineribeyre@users.noreply.github.com> Date: Thu, 5 Oct 2023 14:49:06 -0500 Subject: [PATCH] Downloads through GA4GH DRS are recorded in audit service (#1117) --- fence/blueprints/data/blueprint.py | 2 -- fence/blueprints/data/indexd.py | 5 ++- fence/blueprints/ga4gh.py | 1 + fence/resources/audit/utils.py | 19 ++++++++-- tests/test_audit_service.py | 56 ++++++++++++++++++++++-------- 5 files changed, 63 insertions(+), 20 deletions(-) diff --git a/fence/blueprints/data/blueprint.py b/fence/blueprints/data/blueprint.py index 9016e1497..04ea3798f 100755 --- a/fence/blueprints/data/blueprint.py +++ b/fence/blueprints/data/blueprint.py @@ -12,7 +12,6 @@ ) from fence.config import config from fence.errors import Forbidden, InternalError, UserError, Unauthorized -from fence.resources.audit.utils import enable_audit_logging from fence.utils import get_valid_expiration @@ -328,7 +327,6 @@ def upload_file(file_id): @blueprint.route("/download/", methods=["GET"]) -@enable_audit_logging def download_file(file_id): """ Get a presigned url to download a file given by file_id. diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index 23ed386ef..94d5eb26a 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -20,8 +20,8 @@ AccountSasPermissions, generate_blob_sas, ) -from fence import auth +from fence import auth from fence.auth import ( get_jwt, current_token, @@ -47,7 +47,9 @@ give_service_account_billing_access_if_necessary, ) from fence.resources.ga4gh.passports import sync_gen3_users_authz_from_ga4gh_passports +from fence.resources.audit.utils import enable_audit_logging from fence.utils import get_valid_expiration_from_request + from . import multipart_upload from ...models import AssumeRoleCacheAWS, query_for_user, query_for_user_by_id from ...models import AssumeRoleCacheGCP @@ -66,6 +68,7 @@ ANONYMOUS_USERNAME = "anonymous" +@enable_audit_logging def get_signed_url_for_file( action, file_id, diff --git a/fence/blueprints/ga4gh.py b/fence/blueprints/ga4gh.py index e73a42465..7b4ef0603 100644 --- a/fence/blueprints/ga4gh.py +++ b/fence/blueprints/ga4gh.py @@ -1,5 +1,6 @@ import flask from flask import request + from fence.errors import UserError from fence.config import config diff --git a/fence/resources/audit/utils.py b/fence/resources/audit/utils.py index a41412781..9331da793 100644 --- a/fence/resources/audit/utils.py +++ b/fence/resources/audit/utils.py @@ -39,6 +39,13 @@ def create_audit_log_for_request(response): in `enable_audit_logging` decorator), record an audit log. The data we need to record the logs are stored in `flask.g.audit_data` before reaching this code. + + TODO The audit service has the ability to record presigned URL "upload" logs but we are not + currently sending those logs. We would need to: + - add the `@enable_audit_logging` decorator to `init_multipart_upload` (single upload requests + are handled by `get_signed_url_for_file` which is already decorated). + - update this function to send the appropriate data when those endpoints are called. + - add upload unit tests to `test_audit_service.py`. """ try: method = flask.request.method @@ -49,11 +56,19 @@ def create_audit_log_for_request(response): # could use `flask.request.url` but we don't want the root URL request_url += f"?{flask.request.query_string.decode('utf-8')}" - if method == "GET" and endpoint.startswith("/data/download/"): + if method == "GET" and ( + endpoint.startswith("/data/download/") + or endpoint.startswith("/ga4gh/drs/v1/objects/") + ): + if endpoint.startswith("/data/download/"): + guid = endpoint[len("/data/download/") :] + else: + guid = endpoint[len("/ga4gh/drs/v1/objects/") :] + guid = guid.split("/access/")[0] flask.current_app.audit_service_client.create_presigned_url_log( status_code=response.status_code, request_url=request_url, - guid=endpoint[len("/data/download/") :], + guid=guid, action="download", **audit_data, ) diff --git a/tests/test_audit_service.py b/tests/test_audit_service.py index c3f8035a2..b1d6568d9 100644 --- a/tests/test_audit_service.py +++ b/tests/test_audit_service.py @@ -107,8 +107,10 @@ def __init__(self, data, status_code=200): @pytest.mark.parametrize("indexd_client_with_arborist", ["s3_and_gs"], indirect=True) +@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"]) @pytest.mark.parametrize("protocol", ["gs", None]) def test_presigned_url_log( + endpoint, protocol, client, user_client, @@ -133,9 +135,12 @@ def test_presigned_url_log( monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True}) guid = "dg.hello/abc" - path = f"/data/download/{guid}" - if protocol: - path += f"?protocol={protocol}" + if endpoint == "download": + path = f"/data/download/{guid}" + if protocol: + path += f"?protocol={protocol}" + else: + path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol or 's3'}" resource_paths = ["/my/resource/path1", "/path2"] indexd_client_with_arborist(resource_paths) headers = { @@ -182,7 +187,9 @@ def test_presigned_url_log( @pytest.mark.parametrize( "indexd_client_with_arborist", ["s3_and_gs_acl_no_authz"], indirect=True ) +@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"]) def test_presigned_url_log_acl( + endpoint, client, user_client, mock_arborist_requests, @@ -206,7 +213,10 @@ def test_presigned_url_log_acl( protocol = "gs" guid = "dg.hello/abc" - path = f"/data/download/{guid}?protocol={protocol}" + if endpoint == "download": + path = f"/data/download/{guid}?protocol={protocol}" + else: + path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol}" indexd_client_with_arborist(None) headers = { "Authorization": "Bearer " @@ -244,7 +254,8 @@ def test_presigned_url_log_acl( @pytest.mark.parametrize("public_indexd_client", ["s3_and_gs"], indirect=True) -def test_presigned_url_log_public(client, public_indexd_client, monkeypatch): +@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"]) +def test_presigned_url_log_public(endpoint, client, public_indexd_client, monkeypatch): """ Same as `test_presigned_url_log`, but with an anonymous user downloading public data. @@ -254,8 +265,12 @@ def test_presigned_url_log_public(client, public_indexd_client, monkeypatch): ) monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True}) + protocol = "s3" guid = "dg.hello/abc" - path = f"/data/download/{guid}" + if endpoint == "download": + path = f"/data/download/{guid}?protocol={protocol}" + else: + path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol}" with audit_service_mocker as audit_service_requests: audit_service_requests.post.return_value = MockResponse( @@ -275,13 +290,15 @@ def test_presigned_url_log_public(client, public_indexd_client, monkeypatch): "guid": guid, "resource_paths": [], "action": "download", - "protocol": "s3", + "protocol": protocol, }, ) @pytest.mark.parametrize("indexd_client_with_arborist", ["s3_and_gs"], indirect=True) +@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"]) def test_presigned_url_log_disabled( + endpoint, client, user_client, mock_arborist_requests, @@ -307,7 +324,10 @@ def test_presigned_url_log_disabled( protocol = "gs" guid = "dg.hello/abc" - path = f"/data/download/{guid}" + if endpoint == "download": + path = f"/data/download/{guid}" + else: + path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol}" if protocol: path += f"?protocol={protocol}" resource_paths = ["/my/resource/path1", "/path2"] @@ -324,9 +344,6 @@ def test_presigned_url_log_disabled( ) } - # protocol=None should fall back to s3 (first indexed location): - expected_protocol = protocol or "s3" - with audit_service_mocker as audit_service_requests: audit_service_requests.post.return_value = MockResponse( data={}, @@ -339,17 +356,26 @@ def test_presigned_url_log_disabled( @pytest.mark.parametrize("indexd_client", ["s3_and_gs"], indirect=True) -def test_presigned_url_log_unauthorized(client, indexd_client, db_session, monkeypatch): +@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"]) +def test_presigned_url_log_unauthorized( + endpoint, client, indexd_client, db_session, monkeypatch +): """ - If Fence does not return a presigned URL, no audit log should be created. + If Fence does not return a presigned URL, an audit log with the appropriate status + code should be created. """ audit_service_mocker = mock.patch( "fence.resources.audit.client.requests", new_callable=mock.Mock ) monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True}) + protocol = "s3" guid = "dg.hello/abc" - path = f"/data/download/{guid}" + path = f"/data/download/{guid}?protocol={protocol}" + if endpoint == "download": + path = f"/data/download/{guid}?protocol={protocol}" + else: + path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol}" with audit_service_mocker as audit_service_requests: audit_service_requests.post.return_value = MockResponse( data={}, @@ -367,7 +393,7 @@ def test_presigned_url_log_unauthorized(client, indexd_client, db_session, monke "guid": guid, "resource_paths": [], "action": "download", - "protocol": "s3", + "protocol": protocol, }, )