Skip to content

Commit

Permalink
Enable max token setting for llama.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
julianbollig committed Feb 27, 2025
1 parent 9815347 commit 44850df
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 3 deletions.
2 changes: 1 addition & 1 deletion LlamaCPP/llama_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def text_conversation_run(
)

full_prompt = convert_prompt(prompt)
stream = self.llm_interface.create_chat_completion(full_prompt)
stream = self.llm_interface.create_chat_completion(full_prompt, params.max_tokens)
self.stream_function(stream)

except Exception as ex:
Expand Down
3 changes: 2 additions & 1 deletion LlamaCPP/llama_cpp_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ def load_model(self, params: LLMParams, n_gpu_layers: int = -1, context_length:
if callback is not None:
callback("finish")

def create_chat_completion(self, messages: List[Dict[str, str]]):
def create_chat_completion(self, messages: List[Dict[str, str]], max_tokens: int = 1024):
completion: Iterator[CreateChatCompletionStreamResponse] = self._model.create_chat_completion(
messages=messages,
max_tokens=max_tokens,
stream=True,
)
return completion
Expand Down
2 changes: 1 addition & 1 deletion WebUI/src/components/SettingsBasic.vue
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@
></button>
</div>
</div>
<div v-if="textInference.backend !== 'llamaCPP'" class="flex flex-col gap-2">
<div class="flex flex-col gap-2">
<p>Max Tokens</p>
<slide-bar
v-model:current="textInference.maxTokens"
Expand Down

0 comments on commit 44850df

Please sign in to comment.