Skip to content

Commit

Permalink
fix ci
Browse files Browse the repository at this point in the history
Signed-off-by: wangli <wangli858794774@gmail.com>
  • Loading branch information
Potabk committed Feb 26, 2025
1 parent fcbcf88 commit 7ca9d2f
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 13 deletions.
2 changes: 2 additions & 0 deletions benchmarks/backend_request_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def get_model(pretrained_model_name_or_path: str) -> str:
return model_path
return pretrained_model_name_or_path


def get_tokenizer(
pretrained_model_name_or_path: str,
tokenizer_mode: str = "auto",
Expand Down Expand Up @@ -188,6 +189,7 @@ def get_tokenizer(
**kwargs,
)


ASYNC_REQUEST_FUNCS = {
"vllm": async_request_openai_completions,
}
17 changes: 8 additions & 9 deletions benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,9 @@ def sample_hf_requests(
random_seed: int,
fixed_output_len: Optional[int] = None,
) -> List[Tuple[str, str, int, Optional[Dict[str, Collection[str]]]]]:

# Special case for vision_arena dataset
if dataset_path == 'lmarena-ai/vision-arena-bench-v0.1' \
and dataset_subset is None:
and dataset_subset is None:
assert dataset_split == "train"
dataset = load_dataset(dataset_path,
name=dataset_subset,
Expand All @@ -303,8 +302,8 @@ def sample_hf_requests(
streaming=True)
assert "conversations" in dataset.features, (
"HF Dataset must have 'conversations' column.")
filter_func = lambda x: len(x["conversations"]) >= 2
filtered_dataset = dataset.shuffle(seed=random_seed).filter(filter_func)
filtered_dataset = dataset.shuffle(seed=random_seed).filter(
lambda x: len(x["conversations"]) >= 2, )
sampled_requests: List[Tuple[str, int, int, Dict[str,
Collection[str]]]] = []
for data in filtered_dataset:
Expand All @@ -323,7 +322,7 @@ def sample_hf_requests(
# Prune too short sequences.
continue
if fixed_output_len is None and \
(prompt_len > 1024 or prompt_len + output_len > 2048):
(prompt_len > 1024 or prompt_len + output_len > 2048):
# Prune too long sequences.
continue

Expand All @@ -342,7 +341,7 @@ def sample_hf_requests(
}
elif "image" in data and isinstance(data["image"], str):
if (data["image"].startswith("http://") or \
data["image"].startswith("file://")):
data["image"].startswith("file://")):
image_url = data["image"]
else:
image_url = f"file://{data['image']}"
Expand Down Expand Up @@ -962,8 +961,8 @@ def main(args: argparse.Namespace):
)

# Traffic
result_json["request_rate"] = (args.request_rate if args.request_rate
< float("inf") else "inf")
result_json["request_rate"] = (
args.request_rate if args.request_rate < float("inf") else "inf")
result_json["burstiness"] = args.burstiness
result_json["max_concurrency"] = args.max_concurrency

Expand All @@ -974,7 +973,7 @@ def main(args: argparse.Namespace):
base_model_id = model_id.split("/")[-1]
max_concurrency_str = (f"-concurrency{args.max_concurrency}"
if args.max_concurrency is not None else "")
file_name = f"{backend}-{args.request_rate}qps{max_concurrency_str}-{base_model_id}-{current_dt}.json" #noqa
file_name = f"{backend}-{args.request_rate}qps{max_concurrency_str}-{base_model_id}-{current_dt}.json" # noqa
if args.result_filename:
file_name = args.result_filename
if args.result_dir:
Expand Down
6 changes: 2 additions & 4 deletions benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,10 @@
from functools import cache
from typing import Dict, List, Optional, Tuple

import torch
import uvloop
from PIL import Image
from tqdm import tqdm
from transformers import (AutoModelForCausalLM, AutoTokenizer,
PreTrainedTokenizerBase)
from transformers import AutoTokenizer, PreTrainedTokenizerBase
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.entrypoints.openai.api_server import \
build_async_engine_client_from_engine_args
Expand Down Expand Up @@ -89,7 +87,6 @@ def get_random_lora_request(

def sample_requests(tokenizer: PreTrainedTokenizerBase,
args: argparse.Namespace) -> List[SampleRequest]:

dataset_path: str = args.dataset_path
num_requests: int = args.num_prompts
fixed_output_len: Optional[int] = args.output_len
Expand Down Expand Up @@ -215,6 +212,7 @@ def run_vllm(
end = time.perf_counter()
return end - start


async def run_vllm_async(
requests: List[SampleRequest],
n: int,
Expand Down

0 comments on commit 7ca9d2f

Please sign in to comment.