From 883e3408c81f7141d801faab57765a60faa60809 Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Wed, 26 Feb 2025 14:13:41 +0100 Subject: [PATCH] ROB: Ignore non-numbers for width when building font width map Closes #3153. --- pypdf/_cmap.py | 4 ++++ tests/test_cmap.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py index f4b5e4b4c..d476cd571 100644 --- a/pypdf/_cmap.py +++ b/pypdf/_cmap.py @@ -423,6 +423,10 @@ def build_font_width_map( # C_first C_last same_W en = second width = w[2].get_object() + if not isinstance(width, (int, float)): + logger_warning(f"Expected numeric value for width, got {width}. Ignoring it.", __name__) + w = w[3:] + continue for c_code in range(st, en + 1): font_width_map[chr(c_code)] = width w = w[3:] diff --git a/tests/test_cmap.py b/tests/test_cmap.py index 94f4c5a8b..2d9e4bbea 100644 --- a/tests/test_cmap.py +++ b/tests/test_cmap.py @@ -305,3 +305,15 @@ def test_standard_encoding(caplog): page = reader.pages[0] assert page.extract_text() == "Lorem ipsum" assert "Advanced encoding" not in caplog.text + + +@pytest.mark.enable_socket +def test_function_in_font_widths(caplog): + """Tests for #3153""" + url = "https://github.com/user-attachments/files/18945709/Marseille_pypdf_level_0.2._compressed.pdf" + name = "issue3153.pdf" + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + + page = reader.pages[455] + assert "La vulnérabilité correspond aux conséquences potentielles" in page.extract_text() + assert "Expected numeric value for width, got {'/Bounds': [0.25, 0.25]," in caplog.text