Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: Simplify file identifiers generation #100

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
22 changes: 10 additions & 12 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import enum
import hashlib
import re
import time
import uuid
from io import BytesIO, FileIO, IOBase
from pathlib import Path
Expand Down Expand Up @@ -136,13 +137,6 @@ class ObjectDeletionFlag(enum.IntFlag):
IMAGES = XOBJECT_IMAGES | INLINE_IMAGES | DRAWING_IMAGES


def _rolling_checksum(stream: BytesIO, blocksize: int = 65536) -> str:
hash = hashlib.md5()
for block in iter(lambda: stream.read(blocksize), b""):
hash.update(block)
return hash.hexdigest()


class PdfWriter(PdfDocCommon):
"""
Write a PDF file out, given pages produced by another class or through
Expand Down Expand Up @@ -1152,10 +1146,14 @@ def clone_document_from_reader(
after_page_append(page.get_object())

def _compute_document_identifier(self) -> ByteStringObject:
stream = BytesIO()
self._write_pdf_structure(stream)
stream.seek(0)
return ByteStringObject(_rolling_checksum(stream).encode("utf8"))
md5 = hashlib.md5()
md5.update(str(time.time()).encode("utf-8"))
md5.update(str(self.fileobj).encode("utf-8"))
md5.update(str(len(self._objects)).encode("utf-8"))
if hasattr(self, "_info"):
for k, v in cast(DictionaryObject, self._info.get_object()).items():
md5.update(f"{k}={v}".encode())
return ByteStringObject(md5.hexdigest().encode("utf-8"))

def generate_file_identifiers(self) -> None:
"""
Expand All @@ -1174,7 +1172,7 @@ def generate_file_identifiers(self) -> None:
id2 = self._compute_document_identifier()
else:
id1 = self._compute_document_identifier()
id2 = id1
id2 = ByteStringObject(id1.original_bytes)
self._ID = ArrayObject((id1, id2))

def encrypt(
Expand Down