diff --git a/llumnix/entrypoints/vllm/serve.py b/llumnix/entrypoints/vllm/serve.py index 25ba4cd9..702a5b89 100644 --- a/llumnix/entrypoints/vllm/serve.py +++ b/llumnix/entrypoints/vllm/serve.py @@ -26,7 +26,7 @@ entrypoints_args, manager_args, instance_args, engine_args = get_args(cfg, LaunchMode.GLOBAL, parser, cli_args) backend_type = BackendType.VLLM if not instance_args.simulator_mode else BackendType.SIM_VLLM - launch_args = LaunchArgs(launch_mode=LaunchMode.GLOBAL, backend_type=BackendType.VLLM) + launch_args = LaunchArgs(launch_mode=LaunchMode.GLOBAL, backend_type=backend_type) # Assume that there is an existing ray cluster when using centralized deployment. connect_to_ray_cluster()