diff --git a/pypdf/_reader.py b/pypdf/_reader.py index a657cd473..2ea21f5b2 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -40,6 +40,7 @@ Iterable, List, Optional, + Set, Tuple, Type, Union, @@ -133,6 +134,7 @@ def __init__( self._validated_root: Optional[DictionaryObject] = None self._initialize_stream(stream) + self._known_objects: Set[Tuple[int, int]] = set() self._override_encryption = False self._encryption: Optional[Encryption] = None @@ -447,7 +449,13 @@ def get_object( ) if self.strict: assert generation == indirect_reference.generation + + current_object = (indirect_reference.idnum, indirect_reference.generation) + if current_object in self._known_objects: + raise PdfReadError(f"Detected loop with self reference for {indirect_reference!r}.") + self._known_objects.add(current_object) retval = read_object(self.stream, self) # type: ignore + self._known_objects.remove(current_object) # override encryption is used for the /Encrypt dictionary if not self._override_encryption and self._encryption is not None: diff --git a/tests/test_reader.py b/tests/test_reader.py index 0cfc306a0..273729498 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1806,3 +1806,15 @@ def test_issue2886(caplog): with pytest.raises(PdfReadError, match="Unexpected empty line in Xref table."): _ = PdfReader(BytesIO(get_data_from_url(url, name=name))) + + +@pytest.mark.enable_socket +def test_infinite_loop_for_length_value(): + """Tests for #3112""" + url = "https://github.com/user-attachments/files/19106009/Special.n.15.du.jeudi.22.fevrier.2024.pdf" + name = "issue3112.pdf" + + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + writer = PdfWriter() + with pytest.raises(PdfReadError, match=r"^Detected loop with self reference for IndirectObject\(165, 0, \d+\)\.$"): + writer.add_page(reader.pages[0])