From fcec590ef789c0ffcf211711b199fb9cb17d0c16 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 23 May 2024 13:42:12 -0700 Subject: [PATCH 1/3] Add addition logging statements to 'tag_object', 'find_object' and '_update_refs_file' --- src/hashstore/filehashstore.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/hashstore/filehashstore.py b/src/hashstore/filehashstore.py index 9e5bdfdf..988cf22e 100644 --- a/src/hashstore/filehashstore.py +++ b/src/hashstore/filehashstore.py @@ -602,6 +602,12 @@ def tag_object(self, pid, cid): self._verify_hashstore_references( pid, cid, "Created missing cid refs file" ) + info_msg = ( + f"FileHashStore - tag_object: pid refs file exists for pid: {pid}" + + f", with the expected cid: {cid} - but cid refs file is missing." + + " Cid refs file created, tagged and verified." + ) + logging.info(info_msg) return True else: # Check if the retrieved cid refs file exists and pid is referenced @@ -694,6 +700,7 @@ def find_object(self, pid): + pid_ref_abs_path + f", but object referenced does not exist, cid: {pid_refs_cid}" ) + logging.error(err_msg) raise RefsFileExistsButCidObjMissing(err_msg) else: return pid_refs_cid @@ -720,6 +727,7 @@ def find_object(self, pid): f"FileHashStore - find_object: pid refs file not found for pid ({pid}): " + pid_ref_abs_path ) + logging.error(err_msg) raise PidRefsDoesNotExist(err_msg) def store_metadata(self, pid, metadata, format_id=None): @@ -1571,6 +1579,11 @@ def _update_refs_file(self, refs_file_path, ref_id, update_type): ref_file.seek(0) ref_file.writelines(new_pid_lines) ref_file.truncate() + debug_msg = ( + f"FileHashStore - _update_refs_file: Update ({update_type}) for ref_id: {ref_id}" + + f" completed on refs file: {refs_file_path}." + ) + logging.debug(debug_msg) except Exception as err: exception_string = ( f"FileHashStore - _update_refs_file: failed to {update_type} for ref_id: {ref_id}" From a520a5f16c5d9c97861a20fad8d68a3b28174a1b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 24 May 2024 10:16:51 -0700 Subject: [PATCH 2/3] Fix inaccurate logging msg in 'delete_object' and typos --- src/hashstore/filehashstore.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hashstore/filehashstore.py b/src/hashstore/filehashstore.py index 988cf22e..f2c6ec84 100644 --- a/src/hashstore/filehashstore.py +++ b/src/hashstore/filehashstore.py @@ -554,7 +554,7 @@ def verify_object( def tag_object(self, pid, cid): logging.debug( - "FileHashStore - tag_object: Tagging object cid: {%s} with pid: {%s}.", + "FileHashStore - tag_object: Tagging object cid: %s with pid: %s.", cid, pid, ) @@ -892,7 +892,7 @@ def delete_object(self, ab_id, id_type=None): # Modify object_locked_pids consecutively with self.object_lock: logging.debug( - "FileHashStore - store_object: Adding pid: %s to object_locked_pids.", + "FileHashStore - delete_object: Adding pid: %s to object_locked_pids.", pid, ) self.object_locked_pids.append(pid) From 3368bec0f4dd95fa46a8bf94a9ee4d98500f1d95 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 24 May 2024 13:54:11 -0700 Subject: [PATCH 3/3] Further revise and add logging statements to 'store_object' and 'tag_object' process and related methods --- src/hashstore/filehashstore.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/src/hashstore/filehashstore.py b/src/hashstore/filehashstore.py index f2c6ec84..1708d860 100644 --- a/src/hashstore/filehashstore.py +++ b/src/hashstore/filehashstore.py @@ -474,6 +474,10 @@ def store_object( checksum_algorithm=checksum_algorithm_checked, file_size_to_validate=expected_object_size, ) + logging.debug( + "FileHashStore - store_object: Attempting to tag object for pid: %s", + pid, + ) self.tag_object(pid, object_metadata.cid) logging.info( "FileHashStore - store_object: Successfully stored object for pid: %s", @@ -589,6 +593,12 @@ def tag_object(self, pid, cid): ) return True elif pid_ref_abs_path_exists and not cid_ref_abs_path_exists: + debug_msg = ( + f"FileHashStore - tag_object: pid refs file exists ({pid_ref_abs_path})" + + f" for pid: {pid}, but cid refs file doesn't at: {cid_ref_abs_path}" + + f" for cid: {cid}" + ) + logging.debug(debug_msg) # A pid reference file can only contain and reference one cid # First, confirm that the expected cid refs file exists by getting the cid with open(pid_ref_abs_path, "r", encoding="utf8") as pid_ref_file: @@ -629,6 +639,11 @@ def tag_object(self, pid, cid): # but doesn't contain the cid. Proceed to overwrite the pid refs file. # There is no return statement, so we move out of this if block. elif not pid_ref_abs_path_exists and cid_ref_abs_path_exists: + debug_msg = ( + f"FileHashStore - tag_object: pid refs file does not exists for pid {pid}" + + f" but cid refs file exists at: {cid_ref_abs_path} for cid: {cid}" + ) + logging.debug(debug_msg) # Create the pid refs file pid_tmp_file_path = self._write_refs_file(tmp_root_path, cid, "pid") self._create_path(os.path.dirname(pid_ref_abs_path)) @@ -660,7 +675,8 @@ def tag_object(self, pid, cid): shutil.move(cid_tmp_file_path, cid_ref_abs_path) # Ensure that the reference files have been written as expected # If there is an issue, client or user will have to manually review - self._verify_hashstore_references(pid, cid, "Created all refs files") + log_msg = "Reference files have been moved to their permanent location." + self._verify_hashstore_references(pid, cid, log_msg) logging.info( "FileHashStore - tag_object: Successfully tagged cid: %s with pid %s", cid, @@ -932,6 +948,11 @@ def delete_object(self, ab_id, id_type=None): self._update_refs_file(cid_ref_abs_path, pid, "remove") # Delete cid reference file and object only if the cid refs file is empty if os.path.getsize(cid_ref_abs_path) == 0: + debug_msg = ( + "FileHashStore - delete_object: cid_refs_file is empty (size == 0):" + + f" {cid_ref_abs_path} - deleting cid refs file and data object." + ) + logging.debug(debug_msg) objects_to_delete.append( self._rename_path_for_deletion(cid_ref_abs_path) ) @@ -1507,7 +1528,7 @@ def _write_refs_file(self, path, ref_id, ref_type): difference being that a cid reference file can potentially contain multiple lines of `pid`s that reference the `cid`. - :param str path: Directory to write the temporary file + :param str path: Directory to write a temporary file into :param str ref_id: Authority-based, persistent or content identifier :param str ref_type: 'cid' or 'pid' @@ -1515,7 +1536,7 @@ def _write_refs_file(self, path, ref_id, ref_type): :rtype: string """ logging.debug( - "FileHashStore - write_cid_refs_file: Writing id (%s) into file: %s", + "FileHashStore - _write_refs_file: Writing id (%s) into a tmp file in: %s", ref_id, path, ) @@ -1779,6 +1800,11 @@ def _verify_hashstore_references(self, pid, cid, additional_log_string): :param str cid: Content identifier. :param str additional_log_string: String to append to exception statement """ + debug_msg = ( + f"FileHashStore - _verify_hashstore_references: verifying pid ({pid})" + + f" and cid ({cid}) refs files. Additional Note: {additional_log_string}" + ) + logging.debug(debug_msg) # Check that reference files were created pid_ref_abs_path = self._resolve_path("pid", pid) cid_ref_abs_path = self._resolve_path("cid", cid)