-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqlm.cfg
123 lines (117 loc) · 6.5 KB
/
qlm.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#Local LLM prompt collection file $TPROMPTF, a temp file used for context ingestion and prompting:
TPROMPTF='/dev/shm/promf'
#Local LLM model directory (where you store all your LLMs, adjust as needed):
#This is used in the LLM functions
LLMDIR='$HOME/ML/Models'
#LLM response aliases (anon. functions) to provide one-shot conversations with the corresponding models.
#LLM inference functions gem, gem2, qwen, qwec, mist and deeq are defined in .zfunc, for autoloading:
alias reqwen='() {[[ -f /dev/shm/reqwen ]] && qwen "$(cat /dev/shm/reqwen)\n$1" || print "No previous qwen calls found!" ; }'
alias reqwec='() {[[ -f /dev/shm/reqwec ]] && qwec "$(cat /dev/shm/reqwec)\n$1" || print "No previous qwec calls found!" ; }'
alias remist='() {[[ -f /dev/shm/remist ]] && mist "$(cat /dev/shm/remist)\n$1" || print "No previous mist calls found!" ; }'
alias redeeq='() {[[ -f /dev/shm/redeeq ]] && deeq "$(cat /dev/shm/redeeq)\n$1" || print "No previous deeq calls found!" ; }'
alias regem='() {[[ -f /dev/shm/regem ]] && gem "$(cat /dev/shm/regem)\n$1" || print "No previous gem calls found!" ; }'
alias regem2='() {[[ -f /dev/shm/regem2 ]] && gem2 "$(cat /dev/shm/regem2)\n$1" || print "No previous gem2 calls found!" ; }'
#Anon. function to populate LLM prompt file $TPROMPTF:
alias promf='() {(( $# )) && {[[ -f $1 ]] && cat "$1" >> $TPROMPTF || {(( $#1 - 1 )) && echo -e $1 >> $TPROMPTF || rm $TPROMPTF ; } ; } || echo -e "$(xsel -op)\n" >> $TPROMPTF ; }'
#Here we define the large language models and their parameters. When adding a model, update all arrays below with the corresponding data.
#Avaliable model files (adjust to your situation) that you want to choose from to use with the qlm function and its completion function _qlm:
#Declared as global assoc. array to speed up completion ["llmname"]="llmfilename":
typeset -gA llmodels=(
DeepseekQwen-14B DeepSeek-R1-Distill-Qwen-14B-Q5_K_L.gguf
Gemma2-9B gemma-2-9b-it-Q6_K_L.gguf
Gemma2-2B gemma-2-2b-it-abliterated-Q6_K_L.gguf
Gemma2-27B gemma-2-27b-it-IQ4_XS.gguf
MistralSmall3-24B Mistral-Small-24B-Instruct-2501-IQ4_XS.gguf
QwenCoder-14B Qwen2.5-Coder-14B-Instruct-Q5_K_L.gguf
QwenCoder-32B Qwen2.5-Coder-32B-Instruct-Q5_K_L.gguf
Qwen-14B Qwen2.5-14B-Instruct-Q5_K_L.gguf
QQwQ-32B Qwen_QwQ-32B-Q5_K_L.gguf
Phi4-14B phi-4-Q5_K_L.gguf
Phi4-mini Phi-4-mini-instruct-Q6_K_L.gguf
Granite3.2-2B granite-3.2-2b-instruct-Q8_0.gguf
Llama3.1-8B Meta-Llama-3.1-8B-Instruct-Q6_K_L.gguf
Llama3.2-3B Llama-3.2-3B-Instruct-Q6_K_L.gguf
)
#Desired context length (powers of 2), to be adjusted depending on use, model size, VRAM size and gpulayers.
typeset -A ctxsize=(
DeepseekQwen-14B 4096
Gemma2-9B 8192
Gemma2-2B 8192
Gemma2-27B 4096
MistralSmall3-24B 8192
QwenCoder-14B 4096
QwenCoder-32B 8192
Qwen-14B 4096
QQwQ-32B 8192
Phi4-14B 4096
Phi4-mini 32768
Granite3.2-2B 4096
Llama3.1-8B 8192
Llama3.2-3B 8192
)
#Max context length of each model:
#DeepseekQwen-14B 131072
#Gemma2-9B 8192
#Gemma2-2B 8192
#Gemma2-27B 8192
#MistralSmall3-24B 32768
#QwenCoder-14B 131072
#QwenCoder-32B 131072
#Qwen-14B 131072
#QQwQ-32B 131072
#Phi4-14B 16384
#Phi4-mini 131072
#Granite3.2-2B 131072
#Llama3.1-8B 131072
#Llama3.2-3B 131072
#Layers to offload to the GPU, to be adjusted depending on model size, VRAM size and desired ctxsize:
typeset -A gpulayers=(
DeepseekQwen-14B 99
Gemma2-9B 99
Gemma2-2B 99
Gemma2-27B 30
MistralSmall3-24B 29
QwenCoder-14B 99
QwenCoder-32B 28
Qwen-14B 99
QQwQ-32B 28
Phi4-14B 99
Phi4-mini 99
Granite3.2-2B 99
Llama3.1-8B 99
Llama3.2-3B 99
)
#Preset temperature parameters for inference, depend on the model and the task:
typeset -A temps=(
DeepseekQwen-14B 0.61
Gemma2-9B 0.61
Gemma2-2B 0.61
Gemma2-27B 0.61
MistralSmall3-24B 0.05
QwenCoder-14B 0.5
QwenCoder-32B 0.5
Qwen-14B 0.61
QQwQ-32B 0.5
Phi4-14B 0.61
Phi4-mini 0.61
Granite3.2-2B 0.61
Llama3.1-8B 0.61
Llama3.2-3B 0.61
)
#Model-dependent prompts with placeholder (UIUIUI) for context(user prompt) ingestion:
typeset -A llmprompts=(
DeepseekQwen-14B "<|begin▁of▁sentence|>You are DeepSeekR1-Qwen14B. You are a helpful assistant.<|User|>UIUIUI<|Assistant|>"
Gemma2-9B "User:\nUIUIUI\nAssistant:"
Gemma2-2B "User:\nUIUIUI\nAssistant:"
Gemma2-27B "User:\nUIUIUI\nAssistant:"
MistralSmall3-24B "[SYSTEM_PROMPT]You are Mistral Small 3, a Large Language Model created by Mistral AI. When you are not sure about some information, you say that you do not have the information.[/SYSTEM_PROMPT][INST]UIUIUI[/INST]"
QwenCoder-14B "<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful coding assistant.<|im_end|>\n<|im_start|>user\nUIUIUI<|im_end|>\n<|im_start|>assistant\n"
QwenCoder-32B "<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful coding assistant.<|im_end|>\n<|im_start|>user\nUIUIUI<|im_end|>\n<|im_start|>assistant\n"
Qwen-14B "<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\nUIUIUI<|im_end|>\n<|im_start|>assistant\n"
QQwQ-32B "<|im_start|>system\nYou are Qwen, a helpful coding assistant. Think step by step but only keep a minimum draft of each thinking step, with 5 words at most. Return the answer at the end of the response after a separator ####.<|im_end|>\n<|im_start|>user\nUIUIUI<|im_end|>\n<|im_start|>assistant\n"
Phi4-14B "<|im_start|>system<|im_sep|>You are Phi4, a helpful assistant.<|im_end|><|im_start|>user<|im_sep|>UIUIUI<|im_end|><|im_start|>assistant<|im_sep|>"
Phi4-mini "<|system|>You are Phi4-mini, a helpful assistant.<|end|><|user|>UIUIUI<|end|><|assistant|>"
Granite3.2-2B "<|start_of_role|>system<|end_of_role|>You are Granite, developed by IBM. You are a helpful AI assistant. \nRespond to every user query in a detailed way. You can share your thoughts and reasoning before responding. In the thought process, engage in a comprehensive, iterative cycle of analysis, summarization, exploration, reassessment and reflection. In the response, based on explorations and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process for each user query.<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>UIUIUI<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>\n"
Llama3.1-8B "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nCutting Knowledge Date: December 2023\nToday Date: $(date +'%d %b %Y')\nYou are Llama3.1, a helpfull assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>UIUIUI<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
Llama3.2-3B "Today is $(date +'%d %b %Y')\nYou are Llama3.2, a helpfull assistant.\nUIUIUI"
)