diff --git a/vllm/utils.py b/vllm/utils.py index 26140e15636a4..2781eceb7ba98 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -17,6 +17,7 @@ Hashable, List, Optional, OrderedDict, Tuple, TypeVar, Union) +import numpy as np import psutil import torch @@ -501,11 +502,6 @@ def str_to_int_tuple(s: str) -> Tuple[int, ...]: f"(e.g., 1, 2, 3). Given input: {s}") from e -def pad_to_max_length(x: List[int], max_len: int, pad: int) -> List[int]: - assert len(x) <= max_len - return x + [pad] * (max_len - len(x)) - - def make_tensor_with_pad( x: List[List[int]], max_len: int, @@ -518,7 +514,10 @@ def make_tensor_with_pad( The padding is applied to the end of each inner list until it reaches `max_len`. """ - padded_x = [pad_to_max_length(x_i, max_len, pad) for x_i in x] + padded_x = np.zeros([len(x), max_len], dtype=np.int32) + pad + for ind, blocktb in enumerate(x): + assert len(blocktb) <= max_len + padded_x[ind, :len(blocktb)] = blocktb return torch.tensor(padded_x, dtype=dtype, device=device)