diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 00b9d498c..4a003856f 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -32,6 +32,7 @@ import enum import hashlib import re +import time import uuid from io import BytesIO, FileIO, IOBase from pathlib import Path @@ -136,13 +137,6 @@ class ObjectDeletionFlag(enum.IntFlag): IMAGES = XOBJECT_IMAGES | INLINE_IMAGES | DRAWING_IMAGES -def _rolling_checksum(stream: BytesIO, blocksize: int = 65536) -> str: - hash = hashlib.md5() - for block in iter(lambda: stream.read(blocksize), b""): - hash.update(block) - return hash.hexdigest() - - class PdfWriter(PdfDocCommon): """ Write a PDF file out, given pages produced by another class or through @@ -1152,10 +1146,14 @@ def clone_document_from_reader( after_page_append(page.get_object()) def _compute_document_identifier(self) -> ByteStringObject: - stream = BytesIO() - self._write_pdf_structure(stream) - stream.seek(0) - return ByteStringObject(_rolling_checksum(stream).encode("utf8")) + md5 = hashlib.md5() + md5.update(str(time.time()).encode("utf-8")) + md5.update(str(self.fileobj).encode("utf-8")) + md5.update(str(len(self._objects)).encode("utf-8")) + if hasattr(self, "_info"): + for k, v in cast(DictionaryObject, self._info.get_object()).items(): + md5.update(f"{k}={v}".encode()) + return ByteStringObject(md5.hexdigest().encode("utf-8")) def generate_file_identifiers(self) -> None: """ @@ -1174,7 +1172,7 @@ def generate_file_identifiers(self) -> None: id2 = self._compute_document_identifier() else: id1 = self._compute_document_identifier() - id2 = id1 + id2 = ByteStringObject(id1.original_bytes) self._ID = ArrayObject((id1, id2)) def encrypt(