Skip to content

Commit

Permalink
This PR is a refactoring of model runner, to decouple it from the cla…
Browse files Browse the repository at this point in the history
…sses specifically designed for GPU.

Signed-off-by: Shanshan Shen <467638484@qq.com>
  • Loading branch information
shen-shanshan committed Feb 5, 2025
1 parent d5e7756 commit 8b45b23
Show file tree
Hide file tree
Showing 4 changed files with 1,104 additions and 328 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,4 @@ cython_debug/
# PyPI configuration file
.pypirc

kernel_meta/
7 changes: 7 additions & 0 deletions vllm_ascend/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,13 @@ class AscendMetadataBuilder(CommonMetadataBuilder[AscendMetadata]):

_metadata_cls = AscendMetadata

def __init__(self, input_builder: "ModelInputForNPUBuilder"):
self.input_builder = input_builder
self.runner = input_builder.runner
self.sliding_window = input_builder.sliding_window
self.block_size = input_builder.block_size
self.prepare()

def compute_npu_slot_indices(self, is_profile_run, slot_indices, seq_id,
seq_len, context_len, start_idx, block_size,
block_tables, max_query_len):
Expand Down
Loading

0 comments on commit 8b45b23

Please sign in to comment.