From 8f97e908b7fd7bffd1fb40d00469cc7f79e95628 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 1 Dec 2023 10:09:37 -0600 Subject: [PATCH 1/5] feat(metrics): add log for each signed url with metadata for metrics tracking --- fence/blueprints/data/indexd.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index 94d5eb26a..066250b8b 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -159,9 +159,35 @@ def get_signed_url_for_file( "sub": authorized_user_from_passport.id, } + _log_signed_url_data_info(indexed_file.index_document) + return {"url": signed_url} +def _log_signed_url_data_info(index_document): + size = index_document.get("size") + acl = index_document.get("acl") + authz = index_document.get("authz") + buckets = set() + + for url in index_document.get("urls", []): + bucket_name = None + if "://" in url: + # Extract the protocol and the rest of the URL + protocol, rest_of_url = url.split("://", 1) + + # Extract bucket name + bucket_name = rest_of_url.split("/")[0] + + buckets.add(bucket_name) + + buckets_formatted = ",".join(buckets) + + logger.info( + f"Signed URL Generated. size={size} acl={acl} authz={authz} buckets={buckets_formatted}" + ) + + def prepare_presigned_url_audit_log(protocol, indexed_file): """ Store in `flask.g.audit_data` the data needed to record an audit log. From 2718cf5af3ff15c4c88981ac7ff037f057c999e9 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 1 Dec 2023 10:12:20 -0600 Subject: [PATCH 2/5] feat(metrics): add user sub to log --- fence/blueprints/data/indexd.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index 066250b8b..c1a8f07b1 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -159,12 +159,15 @@ def get_signed_url_for_file( "sub": authorized_user_from_passport.id, } - _log_signed_url_data_info(indexed_file.index_document) + _log_signed_url_data_info( + index_document=indexed_file.index_document, + user_sub=flask.g.audit_data.get("sub", "") + ) return {"url": signed_url} -def _log_signed_url_data_info(index_document): +def _log_signed_url_data_info(index_document, user_sub): size = index_document.get("size") acl = index_document.get("acl") authz = index_document.get("authz") @@ -184,7 +187,7 @@ def _log_signed_url_data_info(index_document): buckets_formatted = ",".join(buckets) logger.info( - f"Signed URL Generated. size={size} acl={acl} authz={authz} buckets={buckets_formatted}" + f"Signed URL Generated. size={size} acl={acl} authz={authz} buckets={buckets_formatted} user_sub={user_sub}" ) From fc1b08d0a927191f3447f32243cc0da2053e36a3 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 1 Dec 2023 13:33:30 -0600 Subject: [PATCH 3/5] fix(metrics): reduce value for size to stay within limits of metrics gathering --- fence/blueprints/data/indexd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index c1a8f07b1..36dd44689 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -168,7 +168,7 @@ def get_signed_url_for_file( def _log_signed_url_data_info(index_document, user_sub): - size = index_document.get("size") + size_in_kibibytes = index_document.get("size", 0) / 1024 acl = index_document.get("acl") authz = index_document.get("authz") buckets = set() @@ -187,7 +187,7 @@ def _log_signed_url_data_info(index_document, user_sub): buckets_formatted = ",".join(buckets) logger.info( - f"Signed URL Generated. size={size} acl={acl} authz={authz} buckets={buckets_formatted} user_sub={user_sub}" + f"Signed URL Generated. size_in_kibibytes={size_in_kibibytes} acl={acl} authz={authz} buckets={buckets_formatted} user_sub={user_sub}" ) From 51f9340a18efe99e46d2c3b3fb9ff27393465229 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 1 Dec 2023 15:05:03 -0600 Subject: [PATCH 4/5] fix(metrics): only log the bucket for the protocol used --- fence/blueprints/data/indexd.py | 35 ++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index 36dd44689..253a1ad64 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -160,34 +160,37 @@ def get_signed_url_for_file( } _log_signed_url_data_info( - index_document=indexed_file.index_document, - user_sub=flask.g.audit_data.get("sub", "") + indexed_file=indexed_file, + user_sub=flask.g.audit_data.get("sub", ""), + requested_protocol=requested_protocol ) return {"url": signed_url} -def _log_signed_url_data_info(index_document, user_sub): - size_in_kibibytes = index_document.get("size", 0) / 1024 - acl = index_document.get("acl") - authz = index_document.get("authz") - buckets = set() +def _log_signed_url_data_info(indexed_file, user_sub, requested_protocol): + size_in_kibibytes = indexed_file.index_document.get("size", 0) / 1024 + acl = indexed_file.index_document.get("acl") + authz = indexed_file.index_document.get("authz") - for url in index_document.get("urls", []): + # the behavior later on is to pick the 1st location as the signed URL if a protocol is not requested + protocol = requested_protocol or indexed_file.indexed_file_locations[0].protocol + + # figure out which bucket was used based on the protocol + bucket = "" + for url in indexed_file.index_document.get("urls", []): bucket_name = None if "://" in url: # Extract the protocol and the rest of the URL - protocol, rest_of_url = url.split("://", 1) - - # Extract bucket name - bucket_name = rest_of_url.split("/")[0] + bucket_protocol, rest_of_url = url.split("://", 1) - buckets.add(bucket_name) - - buckets_formatted = ",".join(buckets) + if bucket_protocol == protocol: + # Extract bucket name + bucket = rest_of_url.split("/")[0] + break logger.info( - f"Signed URL Generated. size_in_kibibytes={size_in_kibibytes} acl={acl} authz={authz} buckets={buckets_formatted} user_sub={user_sub}" + f"Signed URL Generated. size_in_kibibytes={size_in_kibibytes} acl={acl} authz={authz} bucket={bucket} user_sub={user_sub}" ) From d9f9aac95e2c46d42916461986d9d438dfdcdc91 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 1 Dec 2023 15:07:05 -0600 Subject: [PATCH 5/5] fix(metrics): add protocol back to bucket name --- fence/blueprints/data/indexd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index 253a1ad64..00bf8436d 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -186,7 +186,7 @@ def _log_signed_url_data_info(indexed_file, user_sub, requested_protocol): if bucket_protocol == protocol: # Extract bucket name - bucket = rest_of_url.split("/")[0] + bucket = f"{bucket_protocol}://{rest_of_url.split('/')[0]}" break logger.info(