Commit 6caa04c 1 parent 1cc2945 commit 6caa04c Copy full SHA for 6caa04c
File tree 4 files changed +3
-18
lines changed
moxin-backend/src/backend_impls
4 files changed +3
-18
lines changed Original file line number Diff line number Diff line change @@ -316,22 +316,15 @@ fn create_wasi(
316
316
file : & DownloadedFile ,
317
317
load_model : & LoadModelOptions ,
318
318
) -> wasmedge_sdk:: WasmEdgeResult < WasiModule > {
319
- let ctx_size = if load_model. n_ctx > 0 {
320
- Some ( load_model. n_ctx . to_string ( ) )
321
- } else {
322
- None
323
- } ;
319
+ let ctx_size = Some ( format ! ( "{}" , file. context_size) ) ;
324
320
325
321
let n_gpu_layers = match load_model. gpu_layers {
326
322
moxin_protocol:: protocol:: GPULayers :: Specific ( n) => Some ( n. to_string ( ) ) ,
327
323
moxin_protocol:: protocol:: GPULayers :: Max => None ,
328
324
} ;
329
325
330
- let batch_size = if load_model. n_batch > 0 {
331
- Some ( load_model. n_batch . to_string ( ) )
332
- } else {
333
- None
334
- } ;
326
+ // Set n_batch to a fixed value of 128.
327
+ let batch_size = Some ( format ! ( "128" ) ) ;
335
328
336
329
let mut prompt_template = load_model. prompt_template . clone ( ) ;
337
330
if prompt_template. is_none ( ) && !file. prompt_template . is_empty ( ) {
Original file line number Diff line number Diff line change @@ -136,8 +136,6 @@ fn test_chat() {
136
136
prompt_template : None ,
137
137
gpu_layers : moxin_protocol:: protocol:: GPULayers :: Max ,
138
138
use_mlock : false ,
139
- n_batch : 512 ,
140
- n_ctx : 512 ,
141
139
rope_freq_scale : 0.0 ,
142
140
rope_freq_base : 0.0 ,
143
141
context_overflow_policy : moxin_protocol:: protocol:: ContextOverflowPolicy :: StopAtLimit ,
@@ -211,8 +209,6 @@ fn test_chat_stop() {
211
209
prompt_template : None ,
212
210
gpu_layers : moxin_protocol:: protocol:: GPULayers :: Max ,
213
211
use_mlock : false ,
214
- n_batch : 512 ,
215
- n_ctx : 512 ,
216
212
rope_freq_scale : 0.0 ,
217
213
rope_freq_base : 0.0 ,
218
214
context_overflow_policy : moxin_protocol:: protocol:: ContextOverflowPolicy :: StopAtLimit ,
Original file line number Diff line number Diff line change @@ -28,8 +28,6 @@ pub struct LoadModelOptions {
28
28
pub prompt_template : Option < String > ,
29
29
pub gpu_layers : GPULayers ,
30
30
pub use_mlock : bool ,
31
- pub n_batch : u32 ,
32
- pub n_ctx : u32 ,
33
31
pub rope_freq_scale : f32 ,
34
32
pub rope_freq_base : f32 ,
35
33
Original file line number Diff line number Diff line change @@ -59,8 +59,6 @@ impl Chats {
59
59
prompt_template : None ,
60
60
gpu_layers : moxin_protocol:: protocol:: GPULayers :: Max ,
61
61
use_mlock : false ,
62
- n_batch : 512 ,
63
- n_ctx : 512 ,
64
62
rope_freq_scale : 0.0 ,
65
63
rope_freq_base : 0.0 ,
66
64
context_overflow_policy :
You can’t perform that action at this time.
0 commit comments