Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ZeldaHuang committed Oct 15, 2024
1 parent 7be5998 commit 732a32b
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions llumnix/backends/vllm/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
from ray.util.placement_group import PlacementGroup

from vllm.executor.executor_base import ExecutorBase
from vllm.executor.ray_gpu_executor import RayGPUExecutorAsync, RayWorkerWrapper, get_distributed_init_method,\
get_ip, get_vllm_instance_id, get_open_port
from vllm.executor.ray_gpu_executor import RayGPUExecutor, RayGPUExecutorAsync, RayWorkerWrapper,\
get_distributed_init_method, get_ip, get_vllm_instance_id, get_open_port

from vllm import envs
from vllm.sequence import Logprob, SequenceOutput, SequenceGroupOutput, SamplerOutput, ExecuteModelRequest
Expand Down Expand Up @@ -166,10 +166,10 @@ async def execute_model_async(self, *args, **kwargs):
self.last_inference_latency = (t1 - t0) * 1000
return outputs

class SimGPUExecutor(RayGPUExecutorAsync, ExecutorBase):
class SimGPUExecutor(RayGPUExecutor):
latency_mem: LatencyMemData = None
def __init__(self, *args, **kwargs) -> None:
ExecutorBase.__init__(self, *args, **kwargs)
RayGPUExecutor.__init__(self, *args, **kwargs)
self.last_inference_latency = 0
self.migration_bandwidth = self.latency_mem.migration_bandwidth
# TODO(ZeldaHuang): add swap bandwidth
Expand Down

0 comments on commit 732a32b

Please sign in to comment.