Skip to content

Commit 6caa04c

Browse files
committed
Complete removal of unused settings from loading model options
1 parent 1cc2945 commit 6caa04c

File tree

4 files changed

+3
-18
lines changed

4 files changed

+3
-18
lines changed

moxin-backend/src/backend_impls/chat_ui.rs

+3-10
Original file line numberDiff line numberDiff line change
@@ -316,22 +316,15 @@ fn create_wasi(
316316
file: &DownloadedFile,
317317
load_model: &LoadModelOptions,
318318
) -> wasmedge_sdk::WasmEdgeResult<WasiModule> {
319-
let ctx_size = if load_model.n_ctx > 0 {
320-
Some(load_model.n_ctx.to_string())
321-
} else {
322-
None
323-
};
319+
let ctx_size = Some(format!("{}", file.context_size));
324320

325321
let n_gpu_layers = match load_model.gpu_layers {
326322
moxin_protocol::protocol::GPULayers::Specific(n) => Some(n.to_string()),
327323
moxin_protocol::protocol::GPULayers::Max => None,
328324
};
329325

330-
let batch_size = if load_model.n_batch > 0 {
331-
Some(load_model.n_batch.to_string())
332-
} else {
333-
None
334-
};
326+
// Set n_batch to a fixed value of 128.
327+
let batch_size = Some(format!("128"));
335328

336329
let mut prompt_template = load_model.prompt_template.clone();
337330
if prompt_template.is_none() && !file.prompt_template.is_empty() {

moxin-backend/src/backend_impls/mod.rs

-4
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,6 @@ fn test_chat() {
136136
prompt_template: None,
137137
gpu_layers: moxin_protocol::protocol::GPULayers::Max,
138138
use_mlock: false,
139-
n_batch: 512,
140-
n_ctx: 512,
141139
rope_freq_scale: 0.0,
142140
rope_freq_base: 0.0,
143141
context_overflow_policy: moxin_protocol::protocol::ContextOverflowPolicy::StopAtLimit,
@@ -211,8 +209,6 @@ fn test_chat_stop() {
211209
prompt_template: None,
212210
gpu_layers: moxin_protocol::protocol::GPULayers::Max,
213211
use_mlock: false,
214-
n_batch: 512,
215-
n_ctx: 512,
216212
rope_freq_scale: 0.0,
217213
rope_freq_base: 0.0,
218214
context_overflow_policy: moxin_protocol::protocol::ContextOverflowPolicy::StopAtLimit,

moxin-protocol/src/protocol.rs

-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@ pub struct LoadModelOptions {
2828
pub prompt_template: Option<String>,
2929
pub gpu_layers: GPULayers,
3030
pub use_mlock: bool,
31-
pub n_batch: u32,
32-
pub n_ctx: u32,
3331
pub rope_freq_scale: f32,
3432
pub rope_freq_base: f32,
3533

src/data/chats/mod.rs

-2
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,6 @@ impl Chats {
5959
prompt_template: None,
6060
gpu_layers: moxin_protocol::protocol::GPULayers::Max,
6161
use_mlock: false,
62-
n_batch: 512,
63-
n_ctx: 512,
6462
rope_freq_scale: 0.0,
6563
rope_freq_base: 0.0,
6664
context_overflow_policy:

0 commit comments

Comments
 (0)