diff --git a/_posts/2022-10-02-vm_setup_for_deep_learning.md b/_posts/2022-10-02-vm_setup_for_deep_learning.md index 05a8eb4..01caa8b 100644 --- a/_posts/2022-10-02-vm_setup_for_deep_learning.md +++ b/_posts/2022-10-02-vm_setup_for_deep_learning.md @@ -10,23 +10,26 @@ tags: [software, deep learning, ubuntu, pytorch, setup] # GPU-machine, Ubuntu 22.04 -### Low-level stuff - -{% highlight bash %} -# Check GPU is CUDA-compatible -lspci | grep -i nvidia - -# Remove any previous installations -sudo apt-get purge nvidia* -sudo apt remove nvidia-* +```bash +# Get latest version strings +UBUNTU_VERSION=ubuntu2204 +ARCH=amd64 +DRIVER_VERSION=$(apt-cache search nvidia-driver- | grep "^nvidia-driver" | grep -v open | sort | tail -n 1 | sed "s/ .*//g" | sed -r 's/.*-([0-9]+)/\1/g') +CUDA_VERSION=$(apt-cache search cuda- | grep "^cuda-[0-9]" | sort | tail -n 1 | sed "s/ .*//g" | sed -r 's/.*-([0-9]+-[0-9])/\1/g') +CUDDN_VERSION=9.0.0 + +# Clean up old installs +sudo apt-get -y purge nvidia* +sudo apt-get -y remove nvidia-* sudo rm /etc/apt/sources.list.d/cuda* -sudo apt-get autoremove && sudo apt-get autoclean +sudo apt-get -y autoremove && sudo apt-get -y autoclean sudo rm -rf /usr/local/cuda* -# Install +# Update sudo apt-get update sudo apt-get upgrade -y +# Install base libs sudo apt-get install -y \ build-essential \ libatlas-base-dev \ @@ -44,89 +47,56 @@ sudo apt-get install -y \ libx11-dev \ libxmu-dev \ libxi-dev -{% endhighlight %} - -### NVIDIA drivers -{% highlight bash %} +# Install NVIDIA drivers sudo add-apt-repository ppa:graphics-drivers/ppa -sudo apt update -sudo apt install libnvidia-common-470 -sudo apt install libnvidia-gl-470 -sudo apt install nvidia-driver-470 -sudo apt install nvidia-settings -sudo apt install nvidia-utils-470 -{% endhighlight %} - -### CUDA - -{% highlight bash %} - -wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin - -sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 - -sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub - -sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" sudo apt-get update -sudo apt install cuda-11-3 - - -# setup your paths -echo 'export PATH=/usr/local/cuda-11.3/bin:$PATH' >> ~/.bashrc -echo 'export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc +sudo apt-get install -y libnvidia-common-${DRIVER_VERSION} +sudo apt-get install -y libnvidia-gl-${DRIVER_VERSION} +sudo apt-get install -y nvidia-driver-${DRIVER_VERSION} +sudo apt-get install -y nvidia-settings +sudo apt-get install -y nvidia-utils-${DRIVER_VERSION} + +# Install CUDA +wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/cuda-${UBUNTU_VERSION}.pin +sudo mv cuda-${UBUNTU_VERSION}.pin /etc/apt/preferences.d/cuda-repository-pin-600 +sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/3bf863cc.pub +sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/ /" +sudo apt-get update +sudo apt-get install -y cuda-${CUDA_VERSION} + +# setup CUDA paths (TODO: test if this has already been added) +__export=' +# CUDA +if [ -d "/usr/local/cuda/bin/" ]; then + export PATH=/usr/local/cuda/bin${PATH:+:${PATH}} + export LD_LIBRARY_PATH=/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + export LD_LIBRARY_PATH=/opt/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + export LD_LIBRARY_PATH=/opt/cuda/include${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + export LD_LIBRARY_PATH=/usr/local/cuda/targets/x86_64-linux/include${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + export CUDA_HOME=/usr/local/cuda +fi +' +echo "$__export" >> ~/.bashrc source ~/.bashrc sudo ldconfig -{% endhighlight %} -## cuDNN - -{% highlight bash %} -# Download .deb for CUDA 11.3 from https://developer.nvidia.com/rdp/cudnn-download, and copy to VM via scp. Then: -# wget https://developer.nvidia.com/compute/cudnn/secure/8.5.0/local_installers/11.7/cudnn-local-repo-ubuntu2204-8.5.0.96_1.0-1_amd64.deb -sudo dpkg - i cudnn-local-repo-ubuntu2204-8.5.0.96_1.0-1_amd64.deb -{% endhighlight %} - -## Python and Torch - -{% highlight bash %} -# Make sure the `python` command is by default Python3 (it will be 3.10 for Ubuntu 22.04) -sudo apt install python-is-python3 - -# Install pip -wget https://bootstrap.pypa.io/get-pip.py -python get-pip.py - -python -m pip install wheel setuptools +# Install CuDNN: https://developer.nvidia.com/rdp/cudnn-download +wget https://developer.download.nvidia.com/compute/cudnn/${CUDDN_VERSION}/local_installers/cudnn-local-repo-${UBUNTU_VERSION}-${CUDDN_VERSION}_1.0-1_${ARCH}.deb +sudo dpkg -i cudnn-local-repo-${UBUNTU_VERSION}-${CUDDN_VERSION}_1.0-1_${ARCH}.deb +sudo cp /var/cudnn-local-repo-${UBUNTU_VERSION}-${CUDDN_VERSION}/cudnn-*-keyring.gpg /usr/share/keyrings/ +sudo apt-get update +sudo apt-get -y install cudnn -# Install Python scientific stack -python -m pip install \ - IPython \ - urlpath \ - tqdm \ - numpy \ - scipy \ - pandas \ - matplotlib \ - seaborn \ - anndata \ - scanpy \ - squidpy \ - statsmodels \ - scikit-learn \ - scikit-image \ - networkx \ - torch \ - torchvision \ - IPython \ - --extra-index-url https://download.pytorch.org/whl/cu113 -{% endhighlight %} +# Reboot! +sudo reboot +``` # Non-GPU machine, Ubuntu 22.04 -{% highlight bash %} +```bash # Make sure VM is up-to-date sudo apt-get update sudo apt-get upgrade -y @@ -184,4 +154,4 @@ python -m pip install \ torchvision \ IPython \ --extra-index-url https://download.pytorch.org/whl/cu113 -{% endhighlight %} +```