From 8ea8523744138da981bf952f28a5eb304f9898c3 Mon Sep 17 00:00:00 2001 From: Huazhong Ji Date: Tue, 18 Feb 2025 14:19:38 +0800 Subject: [PATCH] reset default block_size from 16 to 128 (#84) ### What this PR does / why we need it? Changed default block_size in platform.py from 16 to 128, as Ascend Devices have a better affinity for block size 128. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed Signed-off-by: hzji210@gmail.com --- vllm_ascend/platform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 2b847de1..1f22e564 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -96,7 +96,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: parallel_config.worker_cls = "vllm_ascend.worker.NPUWorker" cache_config = vllm_config.cache_config if cache_config and cache_config.block_size is None: - cache_config.block_size = 16 + cache_config.block_size = 128 @classmethod def get_attn_backend_cls(cls, selected_backend, head_size, dtype,