From d535c1dba2bae80fbf695cde296bef7e412f47ee Mon Sep 17 00:00:00 2001 From: zouyida Date: Tue, 25 Feb 2025 15:37:44 +0800 Subject: [PATCH 1/4] enable multimodal ut Signed-off-by: zouyida --- pytest.ini | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index deccf10b..a174294e 100644 --- a/pytest.ini +++ b/pytest.ini @@ -16,7 +16,6 @@ norecursedirs = vllm-empty/tests/lora vllm-empty/tests/models vllm-empty/tests/mistral_tool_use - vllm-empty/tests/multimodal vllm-empty/tests/standalone_tests vllm-empty/tests/async_engine vllm-empty/tests/mq_llm_engine @@ -51,6 +50,9 @@ addopts = --ignore=vllm-empty/tests/test_utils.py --ignore=vllm-empty/tests/models/decoder_only/language/test_aqlm.py --ignore=vllm-empty/tests/models/decoder_only/language/test_gptq_marlin.py --ignore=vllm-empty/tests/models/decoder_only/language/test_gptq_marlin_24.py + --ignore=vllm-empty/tests/multimodal/test_processing.py + --ignore=vllm-empty/tests/multimodal/test_processor_kwargs.py + --ignore=vllm-empty/tests/multimodal/test_utils.py testpaths = vllm-empty/tests From dce979a1345439138dd2cd7a8f285efaab4f786e Mon Sep 17 00:00:00 2001 From: zouyida Date: Wed, 26 Feb 2025 09:35:05 +0800 Subject: [PATCH 2/4] bugfix for mrope Signed-off-by: zouyida --- vllm_ascend/model_runner.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/vllm_ascend/model_runner.py b/vllm_ascend/model_runner.py index d0aa06dc..2b0ddcda 100644 --- a/vllm_ascend/model_runner.py +++ b/vllm_ascend/model_runner.py @@ -721,14 +721,10 @@ def _compute_multi_modal_input(self, inter_data: InterDataForSeqGroup, mrope_input_positions, mrope_position_delta = \ MRotaryEmbedding.get_input_positions( token_ids, + hf_config image_grid_thw=image_grid_thw, video_grid_thw=video_grid_thw, - image_token_id=hf_config.image_token_id, - video_token_id=hf_config.video_token_id, - vision_start_token_id=hf_config.vision_start_token_id, - vision_end_token_id=hf_config.vision_end_token_id, - spatial_merge_size=hf_config.vision_config. - spatial_merge_size, + second_per_grid_ts=None, context_len=inter_data.context_lens[seq_idx], seq_len=inter_data.seq_lens[seq_idx], ) From 97e1e1c098ab364e438403fe7055e4c70f39b3b6 Mon Sep 17 00:00:00 2001 From: zouyida Date: Wed, 26 Feb 2025 09:36:43 +0800 Subject: [PATCH 3/4] bugfix for mrope Signed-off-by: zouyida --- vllm_ascend/model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/model_runner.py b/vllm_ascend/model_runner.py index 2b0ddcda..bc84d2c3 100644 --- a/vllm_ascend/model_runner.py +++ b/vllm_ascend/model_runner.py @@ -721,7 +721,7 @@ def _compute_multi_modal_input(self, inter_data: InterDataForSeqGroup, mrope_input_positions, mrope_position_delta = \ MRotaryEmbedding.get_input_positions( token_ids, - hf_config + hf_config, image_grid_thw=image_grid_thw, video_grid_thw=video_grid_thw, second_per_grid_ts=None, From 861fb55eeafa2791fd9ff67209e9ebff8064f029 Mon Sep 17 00:00:00 2001 From: zouyida Date: Wed, 26 Feb 2025 11:54:48 +0800 Subject: [PATCH 4/4] bugfix for mrope Signed-off-by: zouyida --- vllm_ascend/model_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/model_runner.py b/vllm_ascend/model_runner.py index bc84d2c3..d3c9ba23 100644 --- a/vllm_ascend/model_runner.py +++ b/vllm_ascend/model_runner.py @@ -709,6 +709,7 @@ def _compute_multi_modal_input(self, inter_data: InterDataForSeqGroup, assert image_grid_thw is not None or video_grid_thw is not None, ( "mrope embedding type requires multi-modal input mapper " "returns 'image_grid_thw' or 'video_grid_thw'.") + second_per_grid_ts = mm_kwargs.get("second_per_grid_ts", None) hf_config = self.runner.model_config.hf_config @@ -724,7 +725,7 @@ def _compute_multi_modal_input(self, inter_data: InterDataForSeqGroup, hf_config, image_grid_thw=image_grid_thw, video_grid_thw=video_grid_thw, - second_per_grid_ts=None, + second_per_grid_ts=second_per_grid_ts, context_len=inter_data.context_lens[seq_idx], seq_len=inter_data.seq_lens[seq_idx], )