Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/s3 delete files #1127

Merged
Merged
5 changes: 3 additions & 2 deletions fence/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,9 @@ def app_config(

def _setup_data_endpoint_and_boto(app):
if "AWS_CREDENTIALS" in config and len(config["AWS_CREDENTIALS"]) > 0:
value = list(config["AWS_CREDENTIALS"].values())[0]
app.boto = BotoManager(value, logger=logger)
creds = config["AWS_CREDENTIALS"]
buckets = config.get("S3_BUCKETS", {})
app.boto = BotoManager(creds, buckets, logger=logger)
app.register_blueprint(fence.blueprints.data.blueprint, url_prefix="/data")


Expand Down
5 changes: 3 additions & 2 deletions fence/blueprints/data/indexd.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,9 @@ def init_multipart_upload(key, expires_in=None, bucket=None):
Returns:
uploadId(str)
"""
bucket = bucket or flask.current_app.config["DATA_UPLOAD_BUCKET"]
if not bucket:
try:
bucket = bucket or flask.current_app.config["DATA_UPLOAD_BUCKET"]
except KeyError:
raise InternalError(
"fence not configured with data upload bucket; can't create signed URL"
)
Expand Down
32 changes: 27 additions & 5 deletions fence/resources/aws/boto_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,34 @@ class BotoManager(object):
900 # minimum time for aws assume role is 900 seconds as per boto docs
)

def __init__(self, config, logger):
self.sts_client = client("sts", **config)
self.s3_client = client("s3", **config)
def __init__(self, config, buckets, logger):
default = list(config.values())[0]
self.sts_client = client("sts", **default)
self.s3_client = client("s3", **default)
self.s3_clients = self.create_s3_clients(config, buckets)
self.logger = logger
self.ec2 = None
self.iam = None

def create_s3_clients(self, config, buckets):
s3_clients = {}
for bucket in buckets:
cred_name = buckets[bucket]['cred']
creds = {}
if cred_name != '*':
creds = config[cred_name]
if 'endpoint_url' in buckets[bucket]:
endpoint_url = buckets[bucket]['endpoint_url']
s3_clients[bucket] = client('s3', **creds, endpoint_url=endpoint_url)
else:
s3_clients[bucket] = client('s3', **creds)
return s3_clients

def get_s3_client(self, bucket):
if self.s3_clients.get(bucket) is None:
return self.s3_clients[0]
return self.s3_clients[bucket]

def delete_data_file(self, bucket, prefix):
"""
We use buckets with versioning disabled.
Expand All @@ -33,7 +54,8 @@ def delete_data_file(self, bucket, prefix):
https://docs.aws.amazon.com/AmazonS3/latest/dev/DeletingObjectsfromVersioningSuspendedBuckets.html
"""
try:
s3_objects = self.s3_client.list_objects_v2(
s3_client = self.get_s3_client(bucket)
s3_objects = s3_client.list_objects_v2(
Bucket=bucket, Prefix=prefix, Delimiter="/"
)

Expand All @@ -52,7 +74,7 @@ def delete_data_file(self, bucket, prefix):
self.logger.error("multiple files found with prefix {}".format(prefix))
return ("Multiple files found matching this prefix. Backing off.", 400)
key = s3_objects["Contents"][0]["Key"]
self.s3_client.delete_object(Bucket=bucket, Key=key)
s3_client.delete_object(Bucket=bucket, Key=key)
self.logger.info(
"deleted file for prefix {} in bucket {}".format(prefix, bucket)
)
Expand Down
106 changes: 106 additions & 0 deletions tests/data/test_boto_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import pytest
from unittest.mock import MagicMock, patch
from fence.resources.aws.boto_manager import BotoManager


class TestData:
"""Generate bucket test data that aims to mirror the default example Fence config file."""
def __init__(self):
self.config = {}
self.buckets = {}

def single_bucket(self):
self.config = {
'CRED1': {'access_key': 'key1', 'secret_key': 'secret1'},
}
self.buckets = {
'bucket1': {'cred': 'CRED1', 'region': 'us-east-1', 'endpoint_url': 'https://example.com'},
}
return self

def multiple_buckets(self):
single_bucket = self.single_bucket()
self.config = single_bucket.config | {
'CRED2': {'access_key': 'key2', 'secret_key': 'secret2'},
}
self.buckets = single_bucket.buckets | {
'bucket2': {'cred': 'CRED2', 'region': 'us-east-1'},
'bucket3': {'cred': '*'},
'bucket4': {'cred': 'CRED1', 'region': 'us-east-1', 'role-arn': 'arn:aws:iam::role1'}
}
return self


@patch('fence.resources.aws.boto_manager.client')
def test_create_s3_client_single(mock_client):
test_data = TestData().single_bucket()
config = test_data.config
buckets = test_data.buckets
logger = MagicMock()
boto_manager = BotoManager(config, buckets, logger)

s3_clients = boto_manager.create_s3_clients(config, buckets)

# Assert that the correct call was made to the client function
mock_client.assert_any_call('s3', access_key='key1', secret_key='secret1', endpoint_url='https://example.com')

# Assert that the returned dictionary contains the correct client
assert len(s3_clients) == 1
assert 'bucket1' in s3_clients


@patch('fence.resources.aws.boto_manager.client')
def test_create_s3_clients_multiple(mock_client):
test_data = TestData().multiple_buckets()
config = test_data.config
buckets = test_data.buckets
logger = MagicMock()
boto_manager = BotoManager(config, buckets, logger)

# Call the method under test
s3_clients = boto_manager.create_s3_clients(config, buckets)

# Assert that the correct calls were made to the client function
mock_client.assert_any_call('s3', access_key='key1', secret_key='secret1', endpoint_url='https://example.com')
mock_client.assert_any_call('s3', access_key='key2', secret_key='secret2')
mock_client.assert_any_call('s3')
mock_client.assert_any_call('s3', access_key='key1', secret_key='secret1')

# Assert that the returned dictionary contains the correct clients
assert len(s3_clients) == 4
assert 'bucket1' in s3_clients
assert 'bucket2' in s3_clients
assert 'bucket3' in s3_clients
assert 'bucket4' in s3_clients


@pytest.mark.parametrize("bucket", ['bucket1', 'bucket2', 'bucket3', 'bucket4'])
@patch('fence.resources.aws.boto_manager.client')
def test_delete_data_file(mock_client, bucket):
test_data = TestData().multiple_buckets()
config = test_data.config
buckets = test_data.buckets
logger = MagicMock()
boto_manager = BotoManager(config, buckets, logger)

# Mock the response of list_objects_v2 to include the desired key
prefix = 'data/file.txt'
mock_list_objects_v2_response = {
'Contents': [{'Key': prefix}]
}
# Set up the mock S3 client and its list_objects_v2 and delete_object methods
mock_s3_client = mock_client.return_value
mock_s3_client.list_objects_v2.return_value = mock_list_objects_v2_response

result = boto_manager.delete_data_file(bucket, prefix)

# Create S3 clients for each of the buckets
_ = boto_manager.create_s3_clients(config, buckets)
s3_client = boto_manager.get_s3_client(bucket)
s3_client.list_objects_v2.assert_called_once_with(
Bucket=bucket, Prefix=prefix, Delimiter="/"
)
s3_client.delete_object.assert_called_once_with(Bucket=bucket, Key='data/file.txt')

# Assert the expected result
assert result == ("", 204)