diff --git a/fence/blueprints/data/blueprint.py b/fence/blueprints/data/blueprint.py index 04ea3798f..5dc1055c8 100755 --- a/fence/blueprints/data/blueprint.py +++ b/fence/blueprints/data/blueprint.py @@ -9,6 +9,7 @@ BlankIndex, IndexedFile, get_signed_url_for_file, + verify_data_upload_bucket_configuration, ) from fence.config import config from fence.errors import Forbidden, InternalError, UserError, Unauthorized @@ -177,15 +178,7 @@ def upload_data_file(): protocol = params["protocol"] if "protocol" in params else None bucket = params.get("bucket") if bucket: - s3_buckets = get_value( - flask.current_app.config, - "ALLOWED_DATA_UPLOAD_BUCKETS", - InternalError("ALLOWED_DATA_UPLOAD_BUCKETS not configured"), - ) - if bucket not in s3_buckets: - logger.debug(f"Bucket '{bucket}' not in ALLOWED_DATA_UPLOAD_BUCKETS config") - raise Forbidden(f"Uploading to bucket '{bucket}' is not allowed") - + verify_data_upload_bucket_configuration(bucket) response = { "guid": blank_index.guid, "url": blank_index.make_signed_url( @@ -224,10 +217,16 @@ def init_multipart_upload(): default=default_expires_in, ) + bucket = params.get("bucket") + if bucket: + verify_data_upload_bucket_configuration(bucket) + response = { "guid": blank_index.guid, "uploadId": BlankIndex.init_multipart_upload( - blank_index.guid + "/" + params["file_name"], expires_in=expires_in + blank_index.guid + "/" + params["file_name"], + expires_in=expires_in, + bucket=bucket, ), } return flask.jsonify(response), 201 @@ -256,12 +255,17 @@ def generate_multipart_upload_presigned_url(): default=default_expires_in, ) + bucket = params.get("bucket") + if bucket: + verify_data_upload_bucket_configuration(bucket) + response = { "presigned_url": BlankIndex.generate_aws_presigned_url_for_part( params["key"], params["uploadId"], params["partNumber"], expires_in=expires_in, + bucket=bucket, ) } return flask.jsonify(response), 200 @@ -284,6 +288,7 @@ def complete_multipart_upload(): raise UserError("missing required arguments: {}".format(list(missing))) default_expires_in = flask.current_app.config.get("MAX_PRESIGNED_URL_TTL", 3600) + bucket = params.get("bucket") expires_in = get_valid_expiration( params.get("expires_in"), max_limit=default_expires_in, @@ -292,7 +297,11 @@ def complete_multipart_upload(): try: BlankIndex.complete_multipart_upload( - params["key"], params["uploadId"], params["parts"], expires_in=expires_in + params["key"], + params["uploadId"], + params["parts"], + expires_in=expires_in, + bucket=bucket, ), except InternalError as e: return flask.jsonify({"message": e.message}), e.code @@ -311,14 +320,7 @@ def upload_file(file_id): bucket = flask.request.args.get("bucket") if bucket: - s3_buckets = get_value( - flask.current_app.config, - "ALLOWED_DATA_UPLOAD_BUCKETS", - InternalError("ALLOWED_DATA_UPLOAD_BUCKETS not configured"), - ) - if bucket not in s3_buckets: - logger.debug(f"Bucket '{bucket}' not in ALLOWED_DATA_UPLOAD_BUCKETS config") - raise Forbidden(f"Uploading to bucket '{bucket}' is not allowed") + verify_data_upload_bucket_configuration(bucket) result = get_signed_url_for_file( "upload", file_id, file_name=file_name, bucket=bucket diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index faae1df70..f7b9488f6 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -333,14 +333,9 @@ def make_signed_url(self, file_name, protocol=None, expires_in=None, bucket=None ) else: if not bucket: - try: - bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"] - except KeyError: - raise InternalError( - "fence not configured with data upload bucket; can't create signed URL" - ) + bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"] - self.logger.debug("Attemping to upload to bucket '{}'".format(bucket)) + self.logger.debug("Attempting to upload to bucket '{}'".format(bucket)) s3_url = "s3://{}/{}/{}".format(bucket, self.guid, file_name) url = S3IndexedFileLocation(s3_url).get_signed_url("upload", expires_in) @@ -353,7 +348,7 @@ def make_signed_url(self, file_name, protocol=None, expires_in=None, bucket=None return url @staticmethod - def init_multipart_upload(key, expires_in=None): + def init_multipart_upload(key, expires_in=None, bucket=None): """ Initilize multipart upload given key @@ -363,17 +358,13 @@ def init_multipart_upload(key, expires_in=None): Returns: uploadId(str) """ - try: + if not bucket: bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"] - except KeyError: - raise InternalError( - "fence not configured with data upload bucket; can't create signed URL" - ) s3_url = "s3://{}/{}".format(bucket, key) return S3IndexedFileLocation(s3_url).init_multipart_upload(expires_in) @staticmethod - def complete_multipart_upload(key, uploadId, parts, expires_in=None): + def complete_multipart_upload(key, uploadId, parts, expires_in=None, bucket=None): """ Complete multipart upload @@ -386,19 +377,19 @@ def complete_multipart_upload(key, uploadId, parts, expires_in=None): Returns: None if success otherwise an exception """ - try: + if bucket: + verify_data_upload_bucket_configuration(bucket) + else: bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"] - except KeyError: - raise InternalError( - "fence not configured with data upload bucket; can't create signed URL" - ) s3_url = "s3://{}/{}".format(bucket, key) S3IndexedFileLocation(s3_url).complete_multipart_upload( uploadId, parts, expires_in ) @staticmethod - def generate_aws_presigned_url_for_part(key, uploadId, partNumber, expires_in): + def generate_aws_presigned_url_for_part( + key, uploadId, partNumber, expires_in, bucket=None + ): """ Generate presigned url for each part @@ -410,12 +401,10 @@ def generate_aws_presigned_url_for_part(key, uploadId, partNumber, expires_in): Returns: presigned_url(str) """ - try: + if bucket: + verify_data_upload_bucket_configuration(bucket) + else: bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"] - except KeyError: - raise InternalError( - "fence not configured with data upload bucket; can't create signed URL" - ) s3_url = "s3://{}/{}".format(bucket, key) return S3IndexedFileLocation(s3_url).generate_presigned_url_for_part_upload( uploadId, partNumber, expires_in @@ -1089,7 +1078,7 @@ def init_multipart_upload(self, expires_in): self.bucket_name(), aws_creds, expires_in ) - return multipart_upload.initilize_multipart_upload( + return multipart_upload.initialize_multipart_upload( self.parsed_url.netloc, self.parsed_url.path.strip("/"), credentials ) @@ -1660,3 +1649,11 @@ def filter_auth_ids(action, list_auth_ids): if checked_permission in values: authorized_dbgaps.append(key) return authorized_dbgaps + + +def verify_data_upload_bucket_configuration(bucket): + s3_buckets = flask.current_app.config["ALLOWED_DATA_UPLOAD_BUCKETS"] + if bucket not in s3_buckets: + logger.error(f"Bucket '{bucket}' not in ALLOWED_DATA_UPLOAD_BUCKETS config") + logger.debug(f"Buckets configgured in ALLOWED_DATA_UPLOAD_BUCKETS {s3_buckets}") + raise Forbidden(f"Uploading to bucket '{bucket}' is not allowed") diff --git a/fence/blueprints/data/multipart_upload.py b/fence/blueprints/data/multipart_upload.py index c526ce81b..96c3d6227 100644 --- a/fence/blueprints/data/multipart_upload.py +++ b/fence/blueprints/data/multipart_upload.py @@ -3,7 +3,9 @@ from retry.api import retry_call from cdispyutils.hmac4 import generate_aws_presigned_url +from cdispyutils.config import get_value from cdislogging import get_logger +from fence.config import config from fence.errors import InternalError MAX_TRIES = 5 @@ -11,7 +13,7 @@ logger = get_logger(__name__) -def initilize_multipart_upload(bucket, key, credentials): +def initialize_multipart_upload(bucket_name, key, credentials): """ Initialize multipart upload @@ -23,17 +25,30 @@ def initilize_multipart_upload(bucket, key, credentials): Returns: UploadId(str): uploadId """ + s3_buckets = get_value( + config, "S3_BUCKETS", InternalError("S3_BUCKETS not configured") + ) + bucket = s3_buckets.get(bucket_name) + + url = "" + if bucket.get("endpoint_url"): + url = bucket["endpoint_url"] + session = boto3.Session( aws_access_key_id=credentials["aws_access_key_id"], aws_secret_access_key=credentials["aws_secret_access_key"], aws_session_token=credentials.get("aws_session_token"), ) - s3client = session.client("s3") + s3client = None + if url: + s3client = session.client("s3", endpoint_url=url) + else: + s3client = session.client("s3") try: multipart_upload = retry_call( s3client.create_multipart_upload, - fkwargs={"Bucket": bucket, "Key": key}, + fkwargs={"Bucket": bucket_name, "Key": key}, tries=MAX_TRIES, jitter=10, ) @@ -48,7 +63,7 @@ def initilize_multipart_upload(bucket, key, credentials): return multipart_upload.get("UploadId") -def complete_multipart_upload(bucket, key, credentials, uploadId, parts): +def complete_multipart_upload(bucket_name, key, credentials, uploadId, parts): """ Complete multipart upload. Raise exception if something wrong happens; otherwise success @@ -64,18 +79,31 @@ def complete_multipart_upload(bucket, key, credentials, uploadId, parts): Return: None """ + s3_buckets = get_value( + config, "S3_BUCKETS", InternalError("S3_BUCKETS not configured") + ) + bucket = s3_buckets.get(bucket_name) + + url = "" + if bucket.get("endpoint_url"): + url = bucket["endpoint_url"] + session = boto3.Session( aws_access_key_id=credentials["aws_access_key_id"], aws_secret_access_key=credentials["aws_secret_access_key"], aws_session_token=credentials.get("aws_session_token"), ) - s3client = session.client("s3") + s3client = None + if url: + s3client = session.client("s3", endpoint_url=url) + else: + s3client = session.client("s3") try: retry_call( s3client.complete_multipart_upload, fkwargs={ - "Bucket": bucket, + "Bucket": bucket_name, "Key": key, "MultipartUpload": {"Parts": parts}, "UploadId": uploadId, @@ -95,7 +123,7 @@ def complete_multipart_upload(bucket, key, credentials, uploadId, parts): def generate_presigned_url_for_uploading_part( - bucket, key, credentials, uploadId, partNumber, region, expires + bucket_name, key, credentials, uploadId, partNumber, region, expires ): """ Generate presigned url for uploading object part given uploadId and part number @@ -113,13 +141,24 @@ def generate_presigned_url_for_uploading_part( presigned_url(str) """ - url = "https://{}.s3.amazonaws.com/{}".format(bucket, key) + s3_buckets = get_value( + config, "S3_BUCKETS", InternalError("S3_BUCKETS not configured") + ) + bucket = s3_buckets.get(bucket_name) + + if bucket.get("endpoint_url"): + url = bucket["endpoint_url"].strip("/") + "/{}/{}".format( + bucket_name, key.strip("/") + ) + else: + url = "https://{}.s3.amazonaws.com/{}".format(bucket_name, key) additional_signed_qs = {"partNumber": str(partNumber), "uploadId": uploadId} try: - return generate_aws_presigned_url( + presigned_url = generate_aws_presigned_url( url, "PUT", credentials, "s3", region, expires, additional_signed_qs ) + return presigned_url except Exception as e: raise InternalError( "Can not generate presigned url for part number {} of key {}. Detail {}".format( diff --git a/fence/config.py b/fence/config.py index 21bcf42cb..d981bfd38 100644 --- a/fence/config.py +++ b/fence/config.py @@ -46,6 +46,7 @@ def post_process(self): "CIRRUS_CFG", "WHITE_LISTED_GOOGLE_PARENT_ORGS", "CLIENT_CREDENTIALS_ON_DOWNLOAD_ENABLED", + "DATA_UPLOAD_BUCKET", ] for default in defaults: self.force_default_if_none(default, default_cfg=default_config) diff --git a/openapis/swagger.yaml b/openapis/swagger.yaml index c54bce4ed..70d1f8e1a 100644 --- a/openapis/swagger.yaml +++ b/openapis/swagger.yaml @@ -1717,10 +1717,15 @@ components: type: string description: requested authorization resources to be set on the resulting indexed record. You must have proper authorization to set this + bucket: + type: string + required: false + description: bucket to upload to example: file_name: "my_file.bam" expires_in: 1200 authz: ["/programs/A"] + bucket: "bucket-1" RequestMultipartUpload: type: object required: @@ -1738,6 +1743,10 @@ components: expires_in: type: integer description: optional integer specifying the presigned URL lifetime + bucket: + type: string + required: false + description: bucket to upload\ to CompleteMultipartUpload: type: object required: @@ -1767,6 +1776,10 @@ components: expires_in: type: integer description: optional integer specifying the presigned URL lifetime + bucket: + type: string + required: false + description: bucket to upload to CredentialsSource: type: object required: diff --git a/tests/data/test_blank_index.py b/tests/data/test_blank_index.py index ce560b075..7586d85be 100755 --- a/tests/data/test_blank_index.py +++ b/tests/data/test_blank_index.py @@ -348,7 +348,7 @@ def test_init_multipart_upload_missing_configuration_key(app, indexd_client): uploader = MagicMock() current_app = flask.current_app expected_value = copy.deepcopy(current_app.config) - del expected_value["DATA_UPLOAD_BUCKET"] + expected_value["DATA_UPLOAD_BUCKET"] = "" with patch.object(current_app, "config", expected_value): assert current_app.config == expected_value @@ -395,7 +395,7 @@ def test_complete_multipart_upload_missing_key(app, indexd_client): uploader = MagicMock() current_app = flask.current_app expected_value = copy.deepcopy(current_app.config) - del expected_value["DATA_UPLOAD_BUCKET"] + expected_value["DATA_UPLOAD_BUCKET"] = "" with patch.object(current_app, "config", expected_value): assert current_app.config == expected_value @@ -444,7 +444,7 @@ def test_generate_aws_presigned_url_for_part_missing_key(app, indexd_client): uploader = MagicMock() current_app = flask.current_app expected_value = copy.deepcopy(current_app.config) - del expected_value["DATA_UPLOAD_BUCKET"] + expected_value["DATA_UPLOAD_BUCKET"] = "" with patch.object(current_app, "config", expected_value): assert current_app.config == expected_value @@ -492,7 +492,7 @@ def test_make_signed_url_missing_configuration_key(app, indexd_client): current_app = flask.current_app expected_value = copy.deepcopy(current_app.config) del expected_value["AZ_BLOB_CONTAINER_URL"] - del expected_value["DATA_UPLOAD_BUCKET"] + expected_value["DATA_UPLOAD_BUCKET"] = "" indexed_file_location = indexd_client["indexed_file_location"] with patch.object(current_app, "config", expected_value): @@ -502,10 +502,8 @@ def test_make_signed_url_missing_configuration_key(app, indexd_client): with patch( "fence.blueprints.data.indexd.AzureBlobStorageIndexedFileLocation.get_signed_url" ): - with patch( - "fence.blueprints.data.indexd.S3IndexedFileLocation.get_signed_url" - ): - with pytest.raises(InternalError): - signed_url = blank_index.make_signed_url( - file_name="some file name", protocol=indexed_file_location - ) + + with pytest.raises(InternalError): + signed_url = blank_index.make_signed_url( + file_name="some file name", protocol=indexed_file_location + ) diff --git a/tests/data/test_data.py b/tests/data/test_data.py index 626e57820..6d6186bec 100755 --- a/tests/data/test_data.py +++ b/tests/data/test_data.py @@ -184,7 +184,7 @@ def test_indexd_upload_file_key_error( current_app = fence.blueprints.data.indexd.flask.current_app expected_value = copy.deepcopy(current_app.config) - del expected_value["DATA_UPLOAD_BUCKET"] + expected_value["DATA_UPLOAD_BUCKET"] = "" del expected_value["AZ_BLOB_CONTAINER_URL"] with patch.object(current_app, "config", expected_value): @@ -276,7 +276,7 @@ def test_indexd_upload_file_filename_key_error( current_app = fence.blueprints.data.indexd.flask.current_app expected_value = copy.deepcopy(current_app.config) - del expected_value["DATA_UPLOAD_BUCKET"] + expected_value["DATA_UPLOAD_BUCKET"] = "" del expected_value["AZ_BLOB_CONTAINER_URL"] with patch.object(current_app, "config", expected_value): @@ -1765,6 +1765,176 @@ def json(self): assert response.status_code == 200, response +def test_initialize_multipart_upload_with_bucket_param( + app, client, auth_client, encoded_creds_jwt, user_client +): + """ + Test /data/multipart/init containing bucket parameter + """ + + class MockResponse(object): + def __init__(self, data, status_code=200): + self.data = data + self.status_code = status_code + + def json(self): + return self.data + + data_requests_mocker = mock.patch( + "fence.blueprints.data.indexd.requests", new_callable=mock.Mock + ) + arborist_requests_mocker = mock.patch( + "gen3authz.client.arborist.client.httpx.Client.request", new_callable=mock.Mock + ) + + fence.blueprints.data.indexd.BlankIndex.init_multipart_upload = MagicMock() + with data_requests_mocker as data_requests, arborist_requests_mocker as arborist_requests: + data_requests.post.return_value = MockResponse( + { + "did": str(uuid.uuid4()), + "rev": str(uuid.uuid4())[:8], + "baseid": str(uuid.uuid4()), + } + ) + data_requests.post.return_value.status_code = 200 + arborist_requests.return_value = MockResponse({"auth": True}) + arborist_requests.return_value.status_code = 200 + fence.blueprints.data.indexd.BlankIndex.init_multipart_upload.return_value = ( + "test_uploadId" + ) + headers = { + "Authorization": "Bearer " + encoded_creds_jwt.jwt, + "Content-Type": "application/json", + } + file_name = "asdf" + data = json.dumps({"file_name": file_name, "bucket": "bucket3"}) + response = client.post("/data/multipart/init", headers=headers, data=data) + indexd_url = app.config.get("INDEXD") or app.config.get("BASE_URL") + "/index" + endpoint = indexd_url + "/index/blank/" + indexd_auth = (config["INDEXD_USERNAME"], config["INDEXD_PASSWORD"]) + data_requests.post.assert_called_once_with( + endpoint, + auth=indexd_auth, + json={"file_name": file_name, "uploader": user_client.username}, + headers={}, + ) + assert response.status_code == 201, response + assert "guid" in response.json + assert "uploadId" in response.json + + +def test_multipart_upload_presigned_url_with_bucket_param( + app, client, auth_client, encoded_creds_jwt, user_client +): + """ + Test /data/multipart/upload containing bucket parameter + """ + + class MockResponse(object): + def __init__(self, data, status_code=200): + self.data = data + self.status_code = status_code + + def json(self): + return self.data + + data_requests_mocker = mock.patch( + "fence.blueprints.data.indexd.requests", new_callable=mock.Mock + ) + arborist_requests_mocker = mock.patch( + "gen3authz.client.arborist.client.httpx.Client.request", new_callable=mock.Mock + ) + + fence.blueprints.data.indexd.BlankIndex.generate_aws_presigned_url_for_part = ( + MagicMock() + ) + with data_requests_mocker as data_requests, arborist_requests_mocker as arborist_requests: + data_requests.post.return_value = MockResponse( + { + "did": str(uuid.uuid4()), + "rev": str(uuid.uuid4())[:8], + "baseid": str(uuid.uuid4()), + } + ) + data_requests.post.return_value.status_code = 200 + arborist_requests.return_value = MockResponse({"auth": True}) + arborist_requests.return_value.status_code = 200 + fence.blueprints.data.indexd.BlankIndex.generate_aws_presigned_url_for_part.return_value = ( + "test_presigned" + ) + headers = { + "Authorization": "Bearer " + encoded_creds_jwt.jwt, + "Content-Type": "application/json", + } + key = "guid/asdf" + uploadid = "uploadid" + + data = json.dumps( + {"key": key, "uploadId": uploadid, "partNumber": 1, "bucket": "bucket3"} + ) + response = client.post("/data/multipart/upload", headers=headers, data=data) + + assert response.status_code == 200, response + assert "presigned_url" in response.json + + +def test_multipart_complete_upload_with_bucket_param( + app, client, auth_client, encoded_creds_jwt, user_client +): + """ + Test /data/multipart/complete containing bucket parameter + """ + + class MockResponse(object): + def __init__(self, data, status_code=200): + self.data = data + self.status_code = status_code + + def json(self): + return self.data + + data_requests_mocker = mock.patch( + "fence.blueprints.data.indexd.requests", new_callable=mock.Mock + ) + arborist_requests_mocker = mock.patch( + "gen3authz.client.arborist.client.httpx.Client.request", new_callable=mock.Mock + ) + + fence.blueprints.data.indexd.BlankIndex.complete_multipart_upload = MagicMock() + with data_requests_mocker as data_requests, arborist_requests_mocker as arborist_requests: + data_requests.post.return_value = MockResponse( + { + "did": str(uuid.uuid4()), + "rev": str(uuid.uuid4())[:8], + "baseid": str(uuid.uuid4()), + } + ) + data_requests.post.return_value.status_code = 200 + arborist_requests.return_value = MockResponse({"auth": True}) + arborist_requests.return_value.status_code = 200 + fence.blueprints.data.indexd.BlankIndex.generate_aws_presigned_url_for_part.return_value = ( + "test_presigned" + ) + headers = { + "Authorization": "Bearer " + encoded_creds_jwt.jwt, + "Content-Type": "application/json", + } + key = "guid/asdf" + uploadid = "uploadid" + + data = json.dumps( + { + "key": key, + "uploadId": uploadid, + "bucket": "bucket3", + "parts": [{"partNumber": 1, "Etag": "test_tag"}], + } + ) + response = client.post("/data/multipart/complete", headers=headers, data=data) + + assert response.status_code == 200, response + + def test_delete_files(app, client, auth_client, encoded_creds_jwt, user_client): fence.auth.config["MOCK_AUTH"] = True did = str(uuid.uuid4()) diff --git a/tests/test-fence-config.yaml b/tests/test-fence-config.yaml index 924766336..086c4f6ac 100755 --- a/tests/test-fence-config.yaml +++ b/tests/test-fence-config.yaml @@ -490,7 +490,7 @@ GS_BUCKETS: cred: 'CRED2' region: 'us-east-1' -ALLOWED_DATA_UPLOAD_BUCKETS: ['bucket3'] +ALLOWED_DATA_UPLOAD_BUCKETS: ['bucket3', 'bucket1'] DATA_UPLOAD_BUCKET: 'bucket1'