Skip to content

Commit 987c209

Browse files
committed
IndirectObject.fully_unwrap infinite recursion guard
Ensures we only go to a maximum depth, so a malicious PDF cannot get us indefinitely stuck. Also introduces unit tests.
1 parent ea85c75 commit 987c209

File tree

2 files changed

+77
-2
lines changed

2 files changed

+77
-2
lines changed

pypdf/generic/_base.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
__author__ = "Mathieu Fenniak"
4949
__author_email__ = "biziqe@mathieu.fenniak.net"
50+
MAX_INDIRECT_OBJECT_NESTING_DEPTH = 10
5051

5152

5253
class PdfObject(PdfObjectProtocol):
@@ -292,8 +293,15 @@ def fully_unwrap(obj: Optional["PdfObject"]) -> Optional["PdfObject"]:
292293
Given a PdfObject that may be an IndirectObject, recursively unwrap that IndirectObject until a None or
293294
PdfObject that is not an IndirectObject is returned.
294295
"""
295-
if isinstance(obj, IndirectObject):
296-
return IndirectObject.fully_unwrap(obj.get_object())
296+
depth = 0
297+
while isinstance(obj, IndirectObject):
298+
if depth > MAX_INDIRECT_OBJECT_NESTING_DEPTH:
299+
raise PdfReadError(
300+
"IndirectObject nested too deep. "
301+
"If required, consider increasing MAX_INDIRECT_OBJECT_NESTING_DEPTH."
302+
)
303+
depth += 1
304+
obj = obj.get_object()
297305
return obj
298306

299307
def __repr__(self) -> str:

tests/test_generic.py

+67
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Test the pypdf.generic module."""
22
from io import BytesIO
33
from pathlib import Path
4+
from typing import Optional
45
from unittest.mock import patch
56

67
import pytest
@@ -1058,6 +1059,72 @@ def test_indirect_object_page_dimensions():
10581059
assert mediabox == RectangleObject((0, 0, 792, 612))
10591060

10601061

1062+
def test_indirect_object_fully_unwrap():
1063+
unwrapping = {}
1064+
expected_result = NumberObject(123)
1065+
1066+
class FakeGetObjectReturn:
1067+
"""Fake class to allow the IndirectObject to resolve its underlying object"""
1068+
1069+
def __init__(self, result):
1070+
self.result = result
1071+
1072+
def get_object(self) -> Optional[PdfObject]:
1073+
return self.result
1074+
1075+
class FakePdf:
1076+
"""Fake class to allow the IndirectObject to resolve its underlying object"""
1077+
1078+
def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]:
1079+
return FakeGetObjectReturn(unwrapping[indirect_reference.idnum])
1080+
1081+
fake_pdf = FakePdf()
1082+
# Now we set up two layers of indirection: indirect_object0 contains the object indirect_object1 contains
1083+
# the object expected_result
1084+
indirect_object0 = IndirectObject(0, 0, fake_pdf)
1085+
indirect_object1 = IndirectObject(1, 0, fake_pdf)
1086+
unwrapping[0] = indirect_object1
1087+
unwrapping[1] = expected_result
1088+
1089+
# Confirm our setup is correct
1090+
assert indirect_object0.get_object() == indirect_object1
1091+
assert indirect_object1.get_object() == expected_result
1092+
1093+
# And test
1094+
assert IndirectObject.fully_unwrap(indirect_object0) == expected_result
1095+
1096+
1097+
def test_indirect_object_fully_unwrap_depth_limit():
1098+
1099+
class FakeGetObjectReturn:
1100+
"""
1101+
Fake class to allow the IndirectObject to resolve its underlying object
1102+
"""
1103+
def __init__(self, result):
1104+
self.result = result
1105+
1106+
def get_object(self) -> Optional[PdfObject]:
1107+
return self.result
1108+
1109+
class FakePdf:
1110+
"""
1111+
Fake class to allow the IndirectObject to resolve its underlying object. This version returns the IndirectObject
1112+
that is passed in, triggering our guard against indefinite recursion.
1113+
"""
1114+
def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]:
1115+
return FakeGetObjectReturn(indirect_reference)
1116+
1117+
fake_pdf = FakePdf()
1118+
1119+
indirect_object = IndirectObject(0, 0, fake_pdf)
1120+
1121+
# And test
1122+
with pytest.raises(PdfReadError) as exc:
1123+
IndirectObject.fully_unwrap(indirect_object)
1124+
assert exc.value.args[0] == \
1125+
"IndirectObject nested too deep. If required, consider increasing MAX_INDIRECT_OBJECT_NESTING_DEPTH."
1126+
1127+
10611128
def test_indirect_object_invalid_read():
10621129
stream = BytesIO(b"0 1 s")
10631130
with pytest.raises(PdfReadError) as exc:

0 commit comments

Comments
 (0)