From 5d52c1bb9447a2866d9af17ad8bde0a998cdf964 Mon Sep 17 00:00:00 2001 From: "Rishi C." <77904151+rishic3@users.noreply.github.com> Date: Thu, 27 Feb 2025 14:39:27 -0800 Subject: [PATCH] Fix PyTriton library discovery (#505) Detect and configure required library paths (i.e., to Triton bundled libraries and libpython3.*.so), when starting Triton process. Avoids the need to set executorEnv globally and works across conda/non-conda envs (e.g. Dataproc vs. Databricks). Minor cleanups to PyTriton server shutdown. --------- Signed-off-by: Rishi Chandra --- .../conditional_generation_tf.ipynb | 13 ----- .../conditional_generation_torch.ipynb | 5 -- .../huggingface/deepseek-r1_torch.ipynb | 5 -- .../huggingface/gemma-7b_torch.ipynb | 5 -- .../huggingface/pipelines_tf.ipynb | 13 ----- .../huggingface/pipelines_torch.ipynb | 5 -- .../huggingface/qwen-2.5-7b_torch.ipynb | 5 -- .../sentence_transformers_torch.ipynb | 5 -- .../pytorch/housing_regression_torch.ipynb | 5 -- .../pytorch/image_classification_torch.ipynb | 5 -- .../Spark-DL/dl_inference/pytriton_utils.py | 53 ++++++++++++++++--- .../tensorflow/image_classification_tf.ipynb | 13 ----- .../tensorflow/keras_preprocessing_tf.ipynb | 13 ----- .../tensorflow/keras_resnet50_tf.ipynb | 13 ----- .../tensorflow/text_classification_tf.ipynb | 13 ----- 15 files changed, 45 insertions(+), 126 deletions(-) diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_tf.ipynb index 7a8316c2..1614b3d8 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_tf.ipynb @@ -275,20 +275,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", - " source = \"/usr/lib/x86_64-linux-gnu/libstdc++.so.6\"\n", - " target = f\"{conda_env}/lib/libstdc++.so.6\"\n", - " try:\n", - " if os.path.islink(target) or os.path.exists(target):\n", - " os.remove(target)\n", - " os.symlink(source, target)\n", - " except OSError as e:\n", - " print(f\"Error creating symlink: {e}\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\")\n", " conf.set(\"spark.executorEnv.TF_GPU_ALLOCATOR\", \"cuda_malloc_async\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", "\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_torch.ipynb index e7dd198b..81e73109 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_torch.ipynb @@ -215,12 +215,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", "\n", " conf.set(\"spark.executor.cores\", \"8\")\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/deepseek-r1_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/deepseek-r1_torch.ipynb index d59a6b32..a1ec5533 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/deepseek-r1_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/deepseek-r1_torch.ipynb @@ -305,12 +305,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", " conf.set(\"spark.executorEnv.HF_HOME\", hf_home)\n", "\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/gemma-7b_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/gemma-7b_torch.ipynb index 63cbd1a1..3d03a4d5 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/gemma-7b_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/gemma-7b_torch.ipynb @@ -244,12 +244,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", " conf.set(\"spark.executorEnv.HF_HOME\", hf_home)\n", "\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_tf.ipynb index 1d3fe6ff..cedaece5 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_tf.ipynb @@ -340,20 +340,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", - " source = \"/usr/lib/x86_64-linux-gnu/libstdc++.so.6\"\n", - " target = f\"{conda_env}/lib/libstdc++.so.6\"\n", - " try:\n", - " if os.path.islink(target) or os.path.exists(target):\n", - " os.remove(target)\n", - " os.symlink(source, target)\n", - " except OSError as e:\n", - " print(f\"Error creating symlink: {e}\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\")\n", " conf.set(\"spark.executorEnv.TF_GPU_ALLOCATOR\", \"cuda_malloc_async\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", "\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_torch.ipynb index 476168bc..c8a24976 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_torch.ipynb @@ -254,12 +254,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", "\n", " conf.set(\"spark.executor.cores\", \"8\")\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/qwen-2.5-7b_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/qwen-2.5-7b_torch.ipynb index faf5c51d..1d6405bb 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/qwen-2.5-7b_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/qwen-2.5-7b_torch.ipynb @@ -307,12 +307,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", " conf.set(\"spark.executorEnv.HF_HOME\", hf_home)\n", "\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/sentence_transformers_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/sentence_transformers_torch.ipynb index 9e8d6d48..c402c8ab 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/sentence_transformers_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/sentence_transformers_torch.ipynb @@ -161,12 +161,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", "\n", " conf.set(\"spark.executor.cores\", \"8\")\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/housing_regression_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/housing_regression_torch.ipynb index ca1cc8de..f5b9f6e3 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/housing_regression_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/housing_regression_torch.ipynb @@ -894,12 +894,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", "\n", " conf.set(\"spark.executor.cores\", \"8\")\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/image_classification_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/image_classification_torch.ipynb index 3a85020a..0cbb2bd9 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/image_classification_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/image_classification_torch.ipynb @@ -862,12 +862,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\") \n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", "\n", " conf.set(\"spark.executor.cores\", \"8\")\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytriton_utils.py b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytriton_utils.py index 1a568169..01e89234 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytriton_utils.py +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytriton_utils.py @@ -18,6 +18,7 @@ import os import signal import socket +import sys import time from multiprocessing import Process from typing import Callable, Dict, List, Optional, Tuple @@ -42,8 +43,31 @@ def _start_triton_server( model_path: Optional[str] = None, ) -> List[tuple]: """Task to start Triton server process on a Spark executor.""" - sig = inspect.signature(triton_server_fn) - params = sig.parameters + + def _prepare_pytriton_env(): + """Expose PyTriton to correct libpython3.11.so and Triton bundled libraries.""" + ld_library_paths = [] + + # Add nvidia_pytriton.libs to LD_LIBRARY_PATH + for path in sys.path: + if os.path.isdir(path) and "site-packages" in path: + libs_path = os.path.join(path, "nvidia_pytriton.libs") + if os.path.isdir(libs_path): + ld_library_paths.append(libs_path) + break + + # Add ${CONDA_PREFIX}/lib to LD_LIBRARY_PATH for conda environments + if os.path.exists(os.path.join(sys.prefix, "conda-meta")): + conda_lib = os.path.join(sys.prefix, "lib") + if os.path.isdir(conda_lib): + ld_library_paths.append(conda_lib) + + if "LD_LIBRARY_PATH" in os.environ: + ld_library_paths.append(os.environ["LD_LIBRARY_PATH"]) + + os.environ["LD_LIBRARY_PATH"] = ":".join(ld_library_paths) + + return None def _find_ports(start_port: int = 7000) -> List[int]: """Find available ports for Triton's HTTP, gRPC, and metrics services.""" @@ -59,6 +83,8 @@ def _find_ports(start_port: int = 7000) -> List[int]: return ports ports = _find_ports() + sig = inspect.signature(triton_server_fn) + params = sig.parameters if model_path is not None: assert ( @@ -69,6 +95,7 @@ def _find_ports(start_port: int = 7000) -> List[int]: assert len(params) == 1, "Server function must accept (ports) argument" args = (ports,) + _prepare_pytriton_env() hostname = socket.gethostname() process = Process(target=triton_server_fn, args=args) process.start() @@ -83,6 +110,11 @@ def _find_ports(start_port: int = 7000) -> List[int]: except Exception: pass + client.close() + if process.is_alive(): + # Terminate if timeout is exceeded to avoid dangling server processes + process.terminate() + raise TimeoutError( "Failure: server startup timeout exceeded. Check the executor logs for more info." ) @@ -98,14 +130,19 @@ def _stop_triton_server( pid, _ = server_pids_ports.get(hostname) assert pid is not None, f"No server PID found for host {hostname}" - for _ in range(wait_retries): + try: + process = psutil.Process(pid) + process.terminate() + process.wait(timeout=wait_timeout * wait_retries) + return [True] + except psutil.NoSuchProcess: + return [True] + except psutil.TimeoutExpired: try: - os.kill(pid, signal.SIGTERM) - except OSError: + process.kill() return [True] - time.sleep(wait_timeout) - - return [False] # Failed to terminate or timed out + except: + return [False] class TritonServerManager: diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/image_classification_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/image_classification_tf.ipynb index 3fcdf3a5..621ed2e5 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/image_classification_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/image_classification_tf.ipynb @@ -815,20 +815,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", - " source = \"/usr/lib/x86_64-linux-gnu/libstdc++.so.6\"\n", - " target = f\"{conda_env}/lib/libstdc++.so.6\"\n", - " try:\n", - " if os.path.islink(target) or os.path.exists(target):\n", - " os.remove(target)\n", - " os.symlink(source, target)\n", - " except OSError as e:\n", - " print(f\"Error creating symlink: {e}\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\")\n", " conf.set(\"spark.executorEnv.TF_GPU_ALLOCATOR\", \"cuda_malloc_async\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", "\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/keras_preprocessing_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/keras_preprocessing_tf.ipynb index 5ded9d07..2b181b26 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/keras_preprocessing_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/keras_preprocessing_tf.ipynb @@ -991,20 +991,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", - " source = \"/usr/lib/x86_64-linux-gnu/libstdc++.so.6\"\n", - " target = f\"{conda_env}/lib/libstdc++.so.6\"\n", - " try:\n", - " if os.path.islink(target) or os.path.exists(target):\n", - " os.remove(target)\n", - " os.symlink(source, target)\n", - " except OSError as e:\n", - " print(f\"Error creating symlink: {e}\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\")\n", " conf.set(\"spark.executorEnv.TF_GPU_ALLOCATOR\", \"cuda_malloc_async\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", "\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/keras_resnet50_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/keras_resnet50_tf.ipynb index 97eb600c..6a47b991 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/keras_resnet50_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/keras_resnet50_tf.ipynb @@ -189,20 +189,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", - " source = \"/usr/lib/x86_64-linux-gnu/libstdc++.so.6\"\n", - " target = f\"{conda_env}/lib/libstdc++.so.6\"\n", - " try:\n", - " if os.path.islink(target) or os.path.exists(target):\n", - " os.remove(target)\n", - " os.symlink(source, target)\n", - " except OSError as e:\n", - " print(f\"Error creating symlink: {e}\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\") \n", " conf.set(\"spark.executorEnv.TF_GPU_ALLOCATOR\", \"cuda_malloc_async\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", "\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb index a5dc4d09..1c467581 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb @@ -1210,20 +1210,7 @@ " conf.setMaster(f\"spark://{hostname}:7077\")\n", " conf.set(\"spark.pyspark.python\", f\"{conda_env}/bin/python\")\n", " conf.set(\"spark.pyspark.driver.python\", f\"{conda_env}/bin/python\")\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_env}/lib:{conda_env}/lib/python3.11/site-packages/nvidia_pytriton.libs:$LD_LIBRARY_PATH\")\n", - " source = \"/usr/lib/x86_64-linux-gnu/libstdc++.so.6\"\n", - " target = f\"{conda_env}/lib/libstdc++.so.6\"\n", - " try:\n", - " if os.path.islink(target) or os.path.exists(target):\n", - " os.remove(target)\n", - " os.symlink(source, target)\n", - " except OSError as e:\n", - " print(f\"Error creating symlink: {e}\")\n", " elif on_dataproc:\n", - " # Point PyTriton to correct libpython3.11.so:\n", - " conda_lib_path=\"/opt/conda/miniconda3/lib\"\n", - " conf.set(\"spark.executorEnv.LD_LIBRARY_PATH\", f\"{conda_lib_path}:$LD_LIBRARY_PATH\")\n", " conf.set(\"spark.executorEnv.TF_GPU_ALLOCATOR\", \"cuda_malloc_async\")\n", " conf.set(\"spark.executor.instances\", \"4\") # dataproc defaults to 2\n", "\n",