diff --git a/slurm/2_ApplicationSpecific/VASP-GPU-podman/README.md b/slurm/2_ApplicationSpecific/VASP-GPU-podman/README.md index 2e9c130..413aea7 100644 --- a/slurm/2_ApplicationSpecific/VASP-GPU-podman/README.md +++ b/slurm/2_ApplicationSpecific/VASP-GPU-podman/README.md @@ -6,7 +6,7 @@ This is how to build VASP for GPU use, with a sample Slurm script To build VASP with nvidia GPU support takes about 45 minutes on a debug node. -Get an interactive job on a debug node. +Start an interactive job on a debug node. This example allcates a GPU to the job, but all tests are curenlty commented due to issues with using nvidia GPUs with "podman build" @@ -14,14 +14,14 @@ due to issues with using nvidia GPUs with "podman build" salloc --partition=debug --qos=debug --nodes=1 --gpus-per-node=1 --cpus-per-task=1 --tasks-per-node=32 --exclusive ``` -Create a directory for the podman image tarballs e.g. +Create a directory for the podman OCI images e.g. ``` -mkdir -p /projects/academic/group/podman/images +mkdir -p /projects/academic/ccrgroup/oci_archive_dir ``` Create a directory for the podman scripts e.g. ``` -mkdir -p /projects/academic/group/VASP/bin +mkdir -p /projects/academic/ccrgroup/VASP/bin ``` Download the files to the Slurm temporary directory @@ -33,7 +33,7 @@ wget https://github.com/tonykew/ccr-examples/raw/main/slurm/2_ApplicationSpecifi wget https://github.com/tonykew/ccr-examples/raw/main/slurm/2_ApplicationSpecific/VASP-GPU-podman/fix_nvhpc_.pc_files.bash wget https://github.com/tonykew/ccr-examples/raw/main/slurm/2_ApplicationSpecific/VASP-GPU-podman/run_vasp.bash wget https://github.com/tonykew/ccr-examples/raw/main/slurm/2_ApplicationSpecific/VASP-GPU-podman/run_vasp_shell.bash -wget https://github.com/tonykew/ccr-examples/raw/main/slurm/2_ApplicationSpecific/VASP-GPU-podman/sample_vasp_std.bash +wget https://github.com/tonykew/ccr-examples/raw/main/slurm/2_ApplicationSpecific/VASP-GPU-podman/sample_SLURM_vasp_std.bash chmod 755 build_vasp.bash fix_nvhpc_.pc_files.bash ``` @@ -51,26 +51,26 @@ Usage for the build script: ``` ./build_vasp.bash -usage: ./build_vasp.bash image_tarball_directory bin_directory +usage: ./build_vasp.bash oci_image_archive_directory bin_directory e.g. -./build_vasp.bash /projects/academic/mygroup/podman_images/ /projects/academic/mygroup/VASP/bin +./build_vasp.bash /projects/academic/ccrgroup/oci_archive_dir/ /projects/academic/ccrgroup/VASP/bin ``` ...so, using the two directories created earlier: ``` -./build_vasp.bash /projects/academic/group/podman/images /projects/academic/group/VASP/bin +./build_vasp.bash /projects/academic/ccrgroup/oci_archive_dir /projects/academic/ccrgroup/VASP/bin ``` ## How to use -There is a sample Slurm script in the scripts directory you +There is a sample SLURM script in the scripts directory you provided for the build. e.g. ``` -/projects/academic/group/VASP/bin/sample_vasp_std.bash +/projects/academic/ccrgroup/VASP/bin/sample_SLURM_vasp_std.bash ``` copy the script, and change the "data_dir=" line to the directory with @@ -78,7 +78,7 @@ your data files in (i.e. "INCAR" etc.) e.g. ``` -data_dir="/projects/academic/group/username/data" +data_dir="/projects/academic/ccrgroup/username/data" ``` Make any other changes you might need to the #SBATCH options diff --git a/slurm/2_ApplicationSpecific/VASP-GPU-podman/build_vasp.bash b/slurm/2_ApplicationSpecific/VASP-GPU-podman/build_vasp.bash index e50884e..50f65c5 100755 --- a/slurm/2_ApplicationSpecific/VASP-GPU-podman/build_vasp.bash +++ b/slurm/2_ApplicationSpecific/VASP-GPU-podman/build_vasp.bash @@ -2,18 +2,18 @@ if [ "${1}" = "" ] || [ ! -d "${1}" ] || [ "${2}" = "" ] || [ ! -d "${2}" ] then - echo "usage: ${0} image_tarball_directory bin_directory" >&2 + echo "usage: ${0} oci_image_archive_directory bin_directory" >&2 echo "e.g." >&2 - echo "${0} /projects/academic/mygroup/podman_images/ /projects/academic/mygroup/VASP/bin" >&2 + echo "${0} /projects/academic/ccrgroup/podman_images/ /projects/academic/ccrgroup/VASP/bin" >&2 exit 1 fi -tarball_dir="${1}" +oci_archive_dir="${1}" bin_dir="${2}" -if [ "${tarball_dir}" = "" ] || [ ! -d "${tarball_dir}" ] +if [ "${oci_archive_dir}" = "" ] || [ ! -d "${oci_archive_dir}" ] then - echo "image_tarball_directory \"${tarball_dir}\" doesn't exist - bailing" >&2 + echo "image_oci_archive_directory \"${oci_archive_dir}\" doesn't exist - bailing" >&2 exit 1 fi @@ -66,10 +66,10 @@ then exit 1 fi -echo "Saving container tarball" +echo "Saving container OCI image" podman save \ --format=oci-archive \ - --output="${tarball_dir}/podman-image-vasp-${vasp_version}-gpu-single-node.tar" \ + --output="${oci_archive_dir}/podman-image-vasp-${vasp_version}-gpu-single-node.tar" \ "localhost/vasp-${vasp_version}-gpu-single-node" echo "copying the run scripts" @@ -78,7 +78,7 @@ then for prog_name in vasp_gam vasp_ncl vasp_std do sed -E -e "s|^vasp_version=.*|vasp_version=\"${vasp_version}\"|" \ - -e "s|^image_tarball_dir=.*|image_tarball_dir=\"${tarball_dir}\"|" \ + -e "s|^oci_archive_dir=.*|oci_archive_dir=\"${oci_archive_dir}\"|" \ -e "s|VASP_PROG_NAME|${prog_name}|" \ "run_vasp.bash" > "${bin_dir}/${prog_name}-${vasp_version}.bash" chmod 755 "${bin_dir}/${prog_name}-${vasp_version}.bash" @@ -90,7 +90,7 @@ fi if [ -f "run_vasp_shell.bash" ] then sed -E -e "s|^vasp_version=.*|vasp_version=\"${vasp_version}\"|" \ - -e "s|^image_tarball_dir=.*|image_tarball_dir=\"${tarball_dir}\"|" \ + -e "s|^oci_archive_dir=.*|oci_archive_dir=\"${oci_archive_dir}\"|" \ "run_vasp_shell.bash" > "${bin_dir}/run_vasp_shell-${vasp_version}.bash" chmod 755 "${bin_dir}/run_vasp_shell-${vasp_version}.bash" ln -sf "${bin_dir}/run_vasp_shell-${vasp_version}.bash" "${bin_dir}/run_vasp_shell" @@ -98,8 +98,14 @@ else echo "script \"run_vasp_shell.bash\" missing?" >&2 fi # Provide a sample Slurm script -if [ -f sample_vasp_std.bash ] +if [ -f sample_SLURM_vasp_std.bash ] then + default_acct="$(sacctmgr -rnp show User "$(id -un)" | awk -F'|' '{print $2}')" + sed -E -i -e "/#SBATCH[[:space:]]+--account=/s|\".*\"|\"${default_acct}\"|" \ + -e "/#SBATCH[[:space:]]+--chdir=/s|\".*\"|\"${base_dir}\"|" \ + -e "s|^bin_dir=.*|bin_dir=\"${bin_dir}\"|" \ + "sample_SLURM_vasp_std.bash" + # try to provide a reasonable data dir path grp_group="$(groups | sed 's/ /\n/g' | grep ^grp- | head -1 | sed 's/^grp-//')" if [ "${grp_group}" != "" ] then @@ -112,16 +118,13 @@ then base_dir="/projects/rpci/${grp_group}/$(id -un)/VASP" fi default_acct="$(sacctmgr -rnp show User "$(id -un)" | awk -F'|' '{print $2}')" - sed -E -i -e "/#SBATCH[[:space:]]+--account=/s|\".*\"|\"${default_acct}\"|" \ - -e "/#SBATCH[[:space:]]+--chdir=/s|\".*\"|\"${base_dir}\"|" \ - -e "s|^bin_dir=.*|bin_dir=\"${bin_dir}\"|" \ - -e "s|^data_dir=.*|data_dir=\"${base_dir}/data\"|" \ - "sample_vasp_std.bash" + sed -E -i -e "s|^data_dir=.*|data_dir=\"${base_dir}/data\"|" \ + "sample_SLURM_vasp_std.bash" fi fi - cp "sample_vasp_std.bash" "${bin_dir}/sample_vasp_std.bash" + cp "sample_SLURM_vasp_std.bash" "${bin_dir}/sample_SLURM_vasp_std.bash" else - echo "sample script \"sample_vasp_std.bash\" missing?" >&2 + echo "sample script \"sample_SLURM_vasp_std.bash\" missing?" >&2 fi echo @@ -144,5 +147,5 @@ echo " ${bin_dir}/run_vasp_shell" echo echo echo "Example Slurm script:" -echo " ${bin_dir}/sample_vasp_std.bash" +echo " ${bin_dir}/sample_SLURM_vasp_std.bash" echo diff --git a/slurm/2_ApplicationSpecific/VASP-GPU-podman/run_vasp.bash b/slurm/2_ApplicationSpecific/VASP-GPU-podman/run_vasp.bash index 8ae9704..3bd4088 100755 --- a/slurm/2_ApplicationSpecific/VASP-GPU-podman/run_vasp.bash +++ b/slurm/2_ApplicationSpecific/VASP-GPU-podman/run_vasp.bash @@ -2,14 +2,20 @@ ############################################################################## vasp_version="" -image_tarball_dir="" +oci_archive_dir="" ############################################################################## +if ! which podman > /dev/null 2>&1 +then + echo "podman not found - bailing" >&2 + exit 1 +fi + if ! podman image exists "localhost/vasp-${vasp_version}-gpu-single-node" then # load the image on this node echo "Loading the VASP image from the tarball - this takes some time..." - podman load --input "${image_tarball_dir}/podman-image-vasp-${vasp_version}-gpu-single-node.tar" + podman load --input "${oci_archive_dir}/podman-image-vasp-${vasp_version}-gpu-single-node.tar" fi # Run one MPI task per GPU (with one task per GPU) diff --git a/slurm/2_ApplicationSpecific/VASP-GPU-podman/run_vasp_shell.bash b/slurm/2_ApplicationSpecific/VASP-GPU-podman/run_vasp_shell.bash index 9d11efd..325a2fa 100755 --- a/slurm/2_ApplicationSpecific/VASP-GPU-podman/run_vasp_shell.bash +++ b/slurm/2_ApplicationSpecific/VASP-GPU-podman/run_vasp_shell.bash @@ -2,14 +2,20 @@ ############################################################################## vasp_version="" -image_tarball_dir="" +oci_archive_dir="" ############################################################################## +if ! which podman > /dev/null 2>&1 +then + echo "podman not found - bailing" >&2 + exit 1 +fi + if ! podman image exists "localhost/vasp-${vasp_version}-gpu-single-node" then # load the image on this node echo "Loading the VASP image from the tarball - this takes some time..." - podman load --input "${image_tarball_dir}/podman-image-vasp-${vasp_version}-gpu-single-node.tar" + podman load --input "${oci_archive_dir}/podman-image-vasp-${vasp_version}-gpu-single-node.tar" fi # Run one MPI task per GPU (with one task per GPU) diff --git a/slurm/2_ApplicationSpecific/VASP-GPU-podman/sample_vasp_std.bash b/slurm/2_ApplicationSpecific/VASP-GPU-podman/sample_SLURM_vasp_std.bash similarity index 82% rename from slurm/2_ApplicationSpecific/VASP-GPU-podman/sample_vasp_std.bash rename to slurm/2_ApplicationSpecific/VASP-GPU-podman/sample_SLURM_vasp_std.bash index a11e259..9b5f9c9 100755 --- a/slurm/2_ApplicationSpecific/VASP-GPU-podman/sample_vasp_std.bash +++ b/slurm/2_ApplicationSpecific/VASP-GPU-podman/sample_SLURM_vasp_std.bash @@ -1,7 +1,7 @@ #!/bin/bash -l # Slurm account -#SBATCH --account="ccradmin" +#SBATCH --account="ccrgroup" #SBATCH --clusters=ub-hpc #SBATCH --partition=general-compute --qos=general-compute @@ -10,16 +10,14 @@ #SBATCH --gpus-per-node=1 #SBATCH --exclusive #SBATCH --output=%j.out -# 30 minutes walltime +# 1 hour walltime #SBATCH --time=01:00:00 -# directory in which to write job output file [job_id].out: -#SBATCH --chdir="/projects/academic/tonykew/tonykew/VASP" +# Path to the vasp support scipts (you shouldn't need change this) +bin_dir="/projects/academic/ccrgroup/VASP/bin" -############################################################################## -bin_dir="/vscratch/grp-tonykew/VASP/bin" -data_dir="/projects/academic/tonykew/tonykew/VASP/data" -############################################################################## +# Configure the VASP data directory here: +data_dir="/projects/academic/ccrgroup/ccruser/VASP/data" echo "-------------------------------------------------------------------------------" echo "Job info:" @@ -72,11 +70,6 @@ if [ "${SLURM_NTASKS_PER_GPU}" != "" ] then echo "Tasks per GPU: ${SLURM_NTASKS_PER_GPU}" fi -## Infiniband is not used, so the Slurm topology is irrelevant -#if [ "{SLURM_TOPOLOGY_ADDR}" != "" ] -#then -# echo "SLURM topology: ${SLURM_TOPOLOGY_ADDR}" -#fi echo echo "-------------------------------------------------------------------------------" echo