From 71c81ecb01d7e8d2ed74a3dc8fafcd732ce0c06f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 22 May 2024 12:31:44 -0700 Subject: [PATCH 1/6] Adjust logging level from 'error' to 'warning' in '_verify_object_information' --- src/hashstore/filehashstore.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hashstore/filehashstore.py b/src/hashstore/filehashstore.py index 63ad485c..b52257a4 100644 --- a/src/hashstore/filehashstore.py +++ b/src/hashstore/filehashstore.py @@ -1701,10 +1701,10 @@ def _verify_object_information( exception_string + f" Tmp file deleted and file not stored for pid: {pid}" ) - logging.error(exception_string_for_pid) + logging.warning(exception_string_for_pid) raise ValueError(exception_string_for_pid) else: - logging.error(exception_string) + logging.warning(exception_string) raise ValueError(exception_string) if checksum_algorithm is not None and checksum is not None: if checksum_algorithm not in hex_digests: @@ -1712,7 +1712,7 @@ def _verify_object_information( "FileHashStore - _verify_object_information: checksum_algorithm" + f" ({checksum_algorithm}) cannot be found in the hex digests dictionary." ) - logging.error(exception_string) + logging.warning(exception_string) raise KeyError(exception_string) else: hex_digest_stored = hex_digests[checksum_algorithm] @@ -1729,14 +1729,14 @@ def _verify_object_information( exception_string_for_pid = ( exception_string + f" Tmp file ({tmp_file_name}) deleted." ) - logging.error(exception_string_for_pid) + logging.warning(exception_string_for_pid) raise ValueError(exception_string_for_pid) else: # Delete the object cid = hex_digests[self.algorithm] cid_abs_path = self._resolve_path("cid", cid) self._delete(entity, cid_abs_path) - logging.error(exception_string) + logging.warning(exception_string) raise ValueError(exception_string) def _verify_hashstore_references(self, pid, cid, additional_log_string): From d6b40792fc6a6bb8d8865891e4bc59a9cc483d66 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 22 May 2024 13:57:14 -0700 Subject: [PATCH 2/6] Add new custom exception 'PidObjectMetadataError', revise logging levels, update 'store_object' to accurately relay cause of error, and update pytest --- src/hashstore/filehashstore.py | 37 +++++++++++++++++++-------- tests/test_filehashstore_interface.py | 3 ++- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/hashstore/filehashstore.py b/src/hashstore/filehashstore.py index b52257a4..89974d47 100644 --- a/src/hashstore/filehashstore.py +++ b/src/hashstore/filehashstore.py @@ -477,10 +477,18 @@ def store_object( "FileHashStore - store_object: Successfully stored object for pid: %s", pid, ) + except ObjectMetadataError as ome: + # Note, using '.__cause__' allows the original exception msg to be displayed + exception_string = ( + f"FileHashStore - store_object: failed to store object for pid: {pid}." + + f" Reference files will not be created or tagged. {ome.__cause__}" + ) + logging.error(exception_string) + raise ome except Exception as err: exception_string = ( f"FileHashStore - store_object: failed to store object for pid: {pid}." - + f" Unexpected {err=}, {type(err)=}" + + f" Unexpected error: {err.__cause__}" ) logging.error(exception_string) raise err @@ -1343,11 +1351,11 @@ def _move_and_get_checksums( except ValueError as ve: # If any exception is thrown during validation, exception_string = ( - "FileHashStore - _move_and_get_checksums: Object exists but cannot be verified" - + f" (validation error): {abs_file_path}, deleting temporary file. Error: {ve}" + f"FileHashStore - _move_and_get_checksums: Object already exists for pid: {pid}" + + " , deleting temp file. Ref files will not be created/tagged." ) - logging.error(exception_string) - raise ValueError from ve + logging.warning(exception_string) + raise PidObjectMetadataError(exception_string) from ve finally: # Delete the temporary file, it already exists so it is redundant self._delete(entity, tmp_file_name) @@ -1701,10 +1709,10 @@ def _verify_object_information( exception_string + f" Tmp file deleted and file not stored for pid: {pid}" ) - logging.warning(exception_string_for_pid) + logging.debug(exception_string_for_pid) raise ValueError(exception_string_for_pid) else: - logging.warning(exception_string) + logging.debug(exception_string) raise ValueError(exception_string) if checksum_algorithm is not None and checksum is not None: if checksum_algorithm not in hex_digests: @@ -1712,7 +1720,7 @@ def _verify_object_information( "FileHashStore - _verify_object_information: checksum_algorithm" + f" ({checksum_algorithm}) cannot be found in the hex digests dictionary." ) - logging.warning(exception_string) + logging.debug(exception_string) raise KeyError(exception_string) else: hex_digest_stored = hex_digests[checksum_algorithm] @@ -1729,14 +1737,14 @@ def _verify_object_information( exception_string_for_pid = ( exception_string + f" Tmp file ({tmp_file_name}) deleted." ) - logging.warning(exception_string_for_pid) + logging.debug(exception_string_for_pid) raise ValueError(exception_string_for_pid) else: # Delete the object cid = hex_digests[self.algorithm] cid_abs_path = self._resolve_path("cid", cid) self._delete(entity, cid_abs_path) - logging.warning(exception_string) + logging.debug(exception_string) raise ValueError(exception_string) def _verify_hashstore_references(self, pid, cid, additional_log_string): @@ -2319,6 +2327,15 @@ def close(self): self._obj.seek(self._pos) +class PidObjectMetadataError(Exception): + """Custom exception thrown when an object cannot be verified due + to an error with the metadata provided to validate against.""" + + def __init__(self, message, errors=None): + super().__init__(message) + self.errors = errors + + class PidRefsDoesNotExist(Exception): """Custom exception thrown when a pid refs file does not exist.""" diff --git a/tests/test_filehashstore_interface.py b/tests/test_filehashstore_interface.py index af76998a..54b24788 100644 --- a/tests/test_filehashstore_interface.py +++ b/tests/test_filehashstore_interface.py @@ -11,6 +11,7 @@ from hashstore.filehashstore import ( CidRefsDoesNotExist, + PidObjectMetadataError, PidNotFoundInCidRefsFile, PidRefsDoesNotExist, RefsFileExistsButCidObjMissing, @@ -420,7 +421,7 @@ def test_store_object_duplicate_raises_error_with_bad_validation_data(pids, stor # Store first blob _object_metadata_one = store.store_object(pid, path) # Store second blob - with pytest.raises(ValueError): + with pytest.raises(PidObjectMetadataError): _object_metadata_two = store.store_object( pid, path, checksum="nonmatchingchecksum", checksum_algorithm="sha256" ) From bf87fc6d3222dd64b6a7fe5ca36d5f78f5704548 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 22 May 2024 14:00:39 -0700 Subject: [PATCH 3/6] Fix typo in exception caught with correct name after revision --- src/hashstore/filehashstore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hashstore/filehashstore.py b/src/hashstore/filehashstore.py index 89974d47..dff0a70d 100644 --- a/src/hashstore/filehashstore.py +++ b/src/hashstore/filehashstore.py @@ -477,7 +477,7 @@ def store_object( "FileHashStore - store_object: Successfully stored object for pid: %s", pid, ) - except ObjectMetadataError as ome: + except PidObjectMetadataError as ome: # Note, using '.__cause__' allows the original exception msg to be displayed exception_string = ( f"FileHashStore - store_object: failed to store object for pid: {pid}." From a9d17d0e6673e48b6709a76415bdfaaada4b23f5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 22 May 2024 14:22:21 -0700 Subject: [PATCH 4/6] Improve logging messaging and revise logging levels in '_move_and_get_checksums' --- src/hashstore/filehashstore.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/hashstore/filehashstore.py b/src/hashstore/filehashstore.py index dff0a70d..7da86b57 100644 --- a/src/hashstore/filehashstore.py +++ b/src/hashstore/filehashstore.py @@ -1298,11 +1298,11 @@ def _move_and_get_checksums( # Revert storage process exception_string = ( "FileHashStore - _move_and_get_checksums:" - + f" Unexpected {err=}, {type(err)=}" + + f" Unexpected Error: {err}" ) - logging.error(exception_string) + logging.warning(exception_string) if os.path.isfile(abs_file_path): - # Check to see if object has moved successfully before deleting + # Check to see if object exists before determining whether to delete debug_msg = ( "FileHashStore - _move_and_get_checksums: Permanent file" + f" found during exception, checking hex digest for pid: {pid}" @@ -1312,28 +1312,34 @@ def _move_and_get_checksums( if pid_checksum == hex_digests.get(self.algorithm): # If the checksums match, return and log warning exception_string = ( - "FileHashStore - _move_and_get_checksums: File moved" - + f" successfully but unexpected issue encountered: {exception_string}", + "FileHashStore - _move_and_get_checksums: Object exists at:" + + f" {abs_file_path} but an unexpected issue has been encountered." + + " Reference files will not be created and/or tagged." ) - logging.error(exception_string) + logging.warning(exception_string) raise err else: debug_msg = ( - "FileHashStore - _move_and_get_checksums: Permanent file" - + f" found but with incomplete state, deleting file: {abs_file_path}", + "FileHashStore - _move_and_get_checksums: Object exists at" + + f"{abs_file_path} but the pid object checksum provided does not" + + " match what has been calculated. Deleting object. References will" + + " not be created and/or tagged.", ) logging.debug(debug_msg) self._delete(entity, abs_file_path) + raise err + logging.debug( "FileHashStore - _move_and_get_checksums: Deleting temporary file: %s", tmp_file_name, ) self._delete(entity, tmp_file_name) err_msg = ( - "Aborting store_object upload - an unexpected error has occurred when moving" - + f" file to: {object_cid} - Error: {err}" + f"Object has not been stored for pid: {pid} - an unexpected error has occurred" + + f" when moving tmp file to: {object_cid}. Reference files will not be" + + f" created and/or tagged. Error: {err}" ) - logging.error("FileHashStore - _move_and_get_checksums: %s", err_msg) + logging.warning("FileHashStore - _move_and_get_checksums: %s", err_msg) raise else: # If the file exists, determine if the object is what the client states it to be @@ -1352,12 +1358,14 @@ def _move_and_get_checksums( # If any exception is thrown during validation, exception_string = ( f"FileHashStore - _move_and_get_checksums: Object already exists for pid: {pid}" - + " , deleting temp file. Ref files will not be created/tagged." + + " , deleting temp file. Reference files will not be created and/or tagged" + + " due to an issue with the supplied pid object metadata." ) logging.warning(exception_string) raise PidObjectMetadataError(exception_string) from ve finally: # Delete the temporary file, it already exists so it is redundant + # No exception is thrown so 'store_object' can proceed to tag object self._delete(entity, tmp_file_name) return object_cid, tmp_file_size, hex_digests From 7231791edc88386d762b905be17794e43fe4d4eb Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 22 May 2024 14:27:12 -0700 Subject: [PATCH 5/6] Revise 'store_object' exception messaging --- src/hashstore/filehashstore.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/hashstore/filehashstore.py b/src/hashstore/filehashstore.py index 7da86b57..e9f437b0 100644 --- a/src/hashstore/filehashstore.py +++ b/src/hashstore/filehashstore.py @@ -481,14 +481,16 @@ def store_object( # Note, using '.__cause__' allows the original exception msg to be displayed exception_string = ( f"FileHashStore - store_object: failed to store object for pid: {pid}." - + f" Reference files will not be created or tagged. {ome.__cause__}" + + " Reference files will not be created or tagged. PidObjectMetadataError:" + + ome.__cause__ ) logging.error(exception_string) raise ome except Exception as err: exception_string = ( f"FileHashStore - store_object: failed to store object for pid: {pid}." - + f" Unexpected error: {err.__cause__}" + + " Reference files will not be created or tagged. Unexpected error: " + + err.__cause__ ) logging.error(exception_string) raise err From 1231cbc4f66de48aeaeb1415b213eb59140daf6e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 22 May 2024 14:56:43 -0700 Subject: [PATCH 6/6] Further revise logging levels and update 'store_object' exception messaging format --- src/hashstore/filehashstore.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/hashstore/filehashstore.py b/src/hashstore/filehashstore.py index e9f437b0..47aa548d 100644 --- a/src/hashstore/filehashstore.py +++ b/src/hashstore/filehashstore.py @@ -478,11 +478,10 @@ def store_object( pid, ) except PidObjectMetadataError as ome: - # Note, using '.__cause__' allows the original exception msg to be displayed exception_string = ( f"FileHashStore - store_object: failed to store object for pid: {pid}." - + " Reference files will not be created or tagged. PidObjectMetadataError:" - + ome.__cause__ + + " Reference files will not be created or tagged. PidObjectMetadataError: " + + str(ome) ) logging.error(exception_string) raise ome @@ -490,7 +489,7 @@ def store_object( exception_string = ( f"FileHashStore - store_object: failed to store object for pid: {pid}." + " Reference files will not be created or tagged. Unexpected error: " - + err.__cause__ + + str(err) ) logging.error(exception_string) raise err @@ -1361,9 +1360,9 @@ def _move_and_get_checksums( exception_string = ( f"FileHashStore - _move_and_get_checksums: Object already exists for pid: {pid}" + " , deleting temp file. Reference files will not be created and/or tagged" - + " due to an issue with the supplied pid object metadata." + + f" due to an issue with the supplied pid object metadata. {ve}" ) - logging.warning(exception_string) + logging.debug(exception_string) raise PidObjectMetadataError(exception_string) from ve finally: # Delete the temporary file, it already exists so it is redundant