From 506153b7ef2055e8829f286366a93b7e25453a61 Mon Sep 17 00:00:00 2001 From: Rishi Chandra Date: Tue, 25 Feb 2025 09:19:13 -0800 Subject: [PATCH] Fix CSP instructions --- .../dl_inference/databricks/README.md | 20 +++++++++++-------- .../databricks/setup/init_spark_dl.sh | 2 -- .../databricks/setup/start_cluster.sh | 2 +- .../Spark-DL/dl_inference/dataproc/README.md | 5 +++-- .../dataproc/setup/start_cluster.sh | 1 - 5 files changed, 16 insertions(+), 14 deletions(-) diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/databricks/README.md b/examples/ML+DL-Examples/Spark-DL/dl_inference/databricks/README.md index cc760522..26edfb85 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/databricks/README.md +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/databricks/README.md @@ -1,6 +1,7 @@ # Spark DL Inference on Databricks -**Note**: fields in \ require user inputs. +**Note**: fields in \ require user inputs. +Make sure you are in [this](./) directory. ## Setup @@ -9,16 +10,19 @@ 2. Specify the path to your Databricks workspace: ```shell export WS_PATH= + ``` - export NOTEBOOK_DEST=${WS_PATH}/spark-dl/notebook_torch.ipynb - export UTILS_DEST=${WS_PATH}/spark-dl/pytriton_utils.py - export INIT_DEST=${WS_PATH}/spark-dl/init_spark_dl.sh + ```shell + export SPARK_DL_WS=${WS_PATH}/spark-dl + databricks workspace mkdirs ${SPARK_DL_WS} ``` 3. Specify the local paths to the notebook you wish to run, the utils file, and the init script. As an example for a PyTorch notebook: ```shell export NOTEBOOK_SRC= - export UTILS_SRC= + ``` + ```shell + export UTILS_SRC=$(realpath ../pytriton_utils.py) export INIT_SRC=$(pwd)/setup/init_spark_dl.sh ``` 4. Specify the framework to torch or tf, corresponding to the notebook you wish to run. Continuing with the PyTorch example: @@ -29,9 +33,9 @@ 5. Copy the files to the Databricks Workspace: ```shell - databricks workspace import $NOTEBOOK_DEST --format JUPYTER --file $NOTEBOOK_SRC - databricks workspace import $UTILS_DEST --format AUTO --file $UTILS_SRC - databricks workspace import $INIT_DEST --format AUTO --file $INIT_SRC + databricks workspace import ${SPARK_DL_WS}/notebook_torch.ipynb --format JUPYTER --file $NOTEBOOK_SRC + databricks workspace import ${SPARK_DL_WS}/pytriton_utils.py --format AUTO --file $UTILS_SRC + databricks workspace import ${SPARK_DL_WS}/init_spark_dl.sh --format AUTO --file $INIT_SRC ``` 6. Launch the cluster with the provided script. By default the script will create a cluster with 4 A10 worker nodes and 1 A10 driver node. (Note that the script uses **Azure instances** by default; change as needed). diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/databricks/setup/init_spark_dl.sh b/examples/ML+DL-Examples/Spark-DL/dl_inference/databricks/setup/init_spark_dl.sh index 9515f435..e8e60c93 100755 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/databricks/setup/init_spark_dl.sh +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/databricks/setup/init_spark_dl.sh @@ -10,7 +10,6 @@ if [[ "${FRAMEWORK}" == "torch" ]]; then cat < temp_requirements.txt datasets==3.* transformers -urllib3<2 nvidia-pytriton torch<=2.5.1 torchvision --extra-index-url https://download.pytorch.org/whl/cu121 @@ -24,7 +23,6 @@ elif [[ "${FRAMEWORK}" == "tf" ]]; then cat < temp_requirements.txt datasets==3.* transformers -urllib3<2 nvidia-pytriton EOF else diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/databricks/setup/start_cluster.sh b/examples/ML+DL-Examples/Spark-DL/dl_inference/databricks/setup/start_cluster.sh index 457b080b..7b37efc4 100755 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/databricks/setup/start_cluster.sh +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/databricks/setup/start_cluster.sh @@ -26,7 +26,7 @@ json_config=$(cat < require user inputs. +**Note**: fields in \ require user inputs. +Make sure you are in [this](./) directory. #### Setup GCloud CLI @@ -41,7 +42,7 @@ 5. Copy the utils file to the GCS bucket. ```shell - gcloud storage cp gs://${SPARK_DL_HOME}/ + gcloud storage cp $(realpath ../pytriton_utils.py) gs://${SPARK_DL_HOME}/ ``` #### Start cluster and run diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/dataproc/setup/start_cluster.sh b/examples/ML+DL-Examples/Spark-DL/dl_inference/dataproc/setup/start_cluster.sh index 35840bae..649a4805 100755 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/dataproc/setup/start_cluster.sh +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/dataproc/setup/start_cluster.sh @@ -45,7 +45,6 @@ scikit-learn huggingface datasets==3.* transformers -urllib3<2 nvidia-pytriton" TORCH_REQUIREMENTS="${COMMON_REQUIREMENTS}