Skip to content

Commit b707052

Browse files
committed
IndirectObject.fully_unwrap infinite recursion guard
Ensures we only go to a maximum depth, so a malicious PDF cannot get us indefinitely stuck. Also introduces unit tests.
1 parent ea85c75 commit b707052

File tree

2 files changed

+78
-2
lines changed

2 files changed

+78
-2
lines changed

pypdf/generic/_base.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
__author__ = "Mathieu Fenniak"
4949
__author_email__ = "biziqe@mathieu.fenniak.net"
50+
MAX_INDIRECT_OBJECT_NESTING_DEPTH = 10
5051

5152

5253
class PdfObject(PdfObjectProtocol):
@@ -292,8 +293,14 @@ def fully_unwrap(obj: Optional["PdfObject"]) -> Optional["PdfObject"]:
292293
Given a PdfObject that may be an IndirectObject, recursively unwrap that IndirectObject until a None or
293294
PdfObject that is not an IndirectObject is returned.
294295
"""
295-
if isinstance(obj, IndirectObject):
296-
return IndirectObject.fully_unwrap(obj.get_object())
296+
depth = 0
297+
while isinstance(obj, IndirectObject):
298+
if depth > MAX_INDIRECT_OBJECT_NESTING_DEPTH:
299+
raise PdfReadError(
300+
"IndirectObject nested too deep. If required, consider increasing MAX_INDIRECT_OBJECT_NESTING_DEPTH."
301+
)
302+
depth += 1
303+
obj = obj.get_object()
297304
return obj
298305

299306
def __repr__(self) -> str:

tests/test_generic.py

+69
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Test the pypdf.generic module."""
22
from io import BytesIO
33
from pathlib import Path
4+
from typing import Optional
45
from unittest.mock import patch
56

67
import pytest
@@ -1058,6 +1059,74 @@ def test_indirect_object_page_dimensions():
10581059
assert mediabox == RectangleObject((0, 0, 792, 612))
10591060

10601061

1062+
def test_indirect_object_fully_unwrap():
1063+
unwrapping = {}
1064+
expected_result = NumberObject(123)
1065+
1066+
class FakeGetObjectReturn:
1067+
"""
1068+
Fake class to allow the IndirectObject to resolve its underlying object
1069+
"""
1070+
def __init__(self, result):
1071+
self.result = result
1072+
1073+
def get_object(self) -> Optional[PdfObject]:
1074+
return self.result
1075+
1076+
class FakePdf:
1077+
"""
1078+
Fake class to allow the IndirectObject to resolve its underlying object
1079+
"""
1080+
def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]:
1081+
return FakeGetObjectReturn(unwrapping[indirect_reference.idnum])
1082+
1083+
fake_pdf = FakePdf()
1084+
# Now we set up two layers of indirection: indirect_object0 contains the object indirect_object1 contains
1085+
# the object expected_result
1086+
indirect_object0 = IndirectObject(0, 0, fake_pdf)
1087+
indirect_object1 = IndirectObject(1, 0, fake_pdf)
1088+
unwrapping[0] = indirect_object1
1089+
unwrapping[1] = expected_result
1090+
1091+
# Confirm our setup is correct
1092+
assert indirect_object0.get_object() == indirect_object1
1093+
assert indirect_object1.get_object() == expected_result
1094+
1095+
# And test
1096+
assert IndirectObject.fully_unwrap(indirect_object0) == expected_result
1097+
1098+
1099+
def test_indirect_object_fully_unwrap_depth_limit():
1100+
1101+
class FakeGetObjectReturn:
1102+
"""
1103+
Fake class to allow the IndirectObject to resolve its underlying object
1104+
"""
1105+
def __init__(self, result):
1106+
self.result = result
1107+
1108+
def get_object(self) -> Optional[PdfObject]:
1109+
return self.result
1110+
1111+
class FakePdf:
1112+
"""
1113+
Fake class to allow the IndirectObject to resolve its underlying object. This version returns the IndirectObject
1114+
that is passed in, triggering our guard against indefinite recursion.
1115+
"""
1116+
def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]:
1117+
return FakeGetObjectReturn(indirect_reference)
1118+
1119+
fake_pdf = FakePdf()
1120+
1121+
indirect_object = IndirectObject(0, 0, fake_pdf)
1122+
1123+
# And test
1124+
with pytest.raises(PdfReadError) as exc:
1125+
IndirectObject.fully_unwrap(indirect_object)
1126+
assert exc.value.args[0] == \
1127+
"IndirectObject nested too deep. If required, consider increasing MAX_INDIRECT_OBJECT_NESTING_DEPTH."
1128+
1129+
10611130
def test_indirect_object_invalid_read():
10621131
stream = BytesIO(b"0 1 s")
10631132
with pytest.raises(PdfReadError) as exc:

0 commit comments

Comments
 (0)