Skip to content

Commit

Permalink
llama.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
Mateusz-Dera committed Dec 29, 2024
1 parent 1e84800 commit afac1bd
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 4 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ROCm-AI-Installer
A script that automatically installs all the required stuff to run selected AI interfaces on AMD Radeon 7900XTX.
It should also work on 7900XT cards.
For other cards, change HSA_OVERRIDE_GFX_VERSION at the beginning of the script (Not tested).
For other cards, change HSA_OVERRIDE_GFX_VERSION and GFX at the beginning of the script (Not tested).

## Info
[![Version](https://img.shields.io/badge/6.1-version-orange.svg)](https://github.com/Mateusz-Dera/ROCm-AI-Installer/blob/main/README.md)
Expand All @@ -26,6 +26,7 @@ For other cards, change HSA_OVERRIDE_GFX_VERSION at the beginning of the script
|KoboldCPP|Python 3.12 venv|https://github.com/YellowRoseCx/koboldcpp-rocm|GGML and GGUF models.|
|Text generation web UI|Python 3.12 venv|https://github.com/oobabooga/text-generation-webui<br/> https://github.com/ROCm/bitsandbytes.git<br/> https://github.com/ROCmSoftwarePlatform/flash-attention<br/> https://github.com/turboderp/exllamav2|ExLlamaV2 and Transformers models.|
|SillyTavern (1.12.9)|Node|https://github.com/SillyTavern/SillyTavern||
|llama.cpp|C++|https://github.com/ggerganov/llama.cpp|1. Put model.gguf into llama.cpp folder.<br> 2. Change context size in run.sh file (Default: 32768).<br> 3. Set GPU offload layers in run.sh file (Default: 1)|

### Image generation
|Name|Enviroment|Links|Additional information|
Expand Down
38 changes: 35 additions & 3 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>

export HSA_OVERRIDE_GFX_VERSION=11.0.0
export GFX=gfx1100

# Version
version="6.1"
Expand Down Expand Up @@ -90,10 +91,11 @@ set_installation_path() {

# Text generation
text_generation() {
whiptail --title "Text generation" --menu "Choose an option:" 15 100 3 \
0 "KoboldCPP" \
whiptail --title "Text generation" --menu "Choose an option:" 15 100 4 \
0 "Install KoboldCPP" \
1 "Text generation web UI" \
2 "SillyTavern" \
3 "Install llama.cpp" \
2>&1 > /dev/tty
}

Expand Down Expand Up @@ -433,7 +435,7 @@ install_koboldcpp() {

# Text generation web UI
install_text_generation_web_ui() {
install "https://github.com/oobabooga/text-generation-webui.git" "cc8c7ed2093cbc747e7032420eae14b5b3c30311" "python server.py --api --listen --extensions sd_api_pictures send_pictures gallery"
install "https://github.com/oobabooga/text-generation-webui.git" "4d466d5c80eb83892b7dfb76fa4ab69efd6d6989" "python server.py --api --listen --extensions sd_api_pictures send_pictures gallery"

# Additional requirements
pip install wheel==0.45.1 setuptools==75.6.0
Expand Down Expand Up @@ -463,6 +465,32 @@ install_sillytavern() {
sed -i 's/basicAuthMode: false/basicAuthMode: true/' config.yaml
}

# llama.cpp
install_llama_cpp() {
cd $installation_path
if [ -d "llama.cpp" ]
then
rm -rf llama.cpp
fi
git clone https://github.com/ggerganov/llama.cpp.git
cd llama.cpp
git checkout f865ea149d71ef883e3780fced8a20a1464eccf4

HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$GFX -DCMAKE_BUILD_TYPE=Release \
&& cmake --build build --config Release -- -j 16

tee --append run.sh <<EOF
#!/bin/bash
export HSA_OVERRIDE_GFX_VERSION=$HSA_OVERRIDE_GFX_VERSION
export CUDA_VISIBLE_DEVICES=0
export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
export TORCH_BLAS_PREFER_HIPBLASLT=0
./build/bin/llama-server -m model.gguf --port 8080 --ctx-size 32768 --gpu-layers 1
EOF
chmod +x run.sh
}

# ANIMAGINE XL 3.1
install_animagine_xl() {
install "https://huggingface.co/spaces/cagliostrolab/animagine-xl-3.1" "76b0dfc75bdc06e7bceeae96de3c09c8fa833008" "python app.py"
Expand Down Expand Up @@ -1034,6 +1062,10 @@ while true; do
esac
done
;;
3)
# llama.cpp
install_llama_cpp
;;
*)
first=false
;;
Expand Down

0 comments on commit afac1bd

Please sign in to comment.