Skip to content

Commit

Permalink
fix comment
Browse files Browse the repository at this point in the history
  • Loading branch information
KuilongCui committed Jan 23, 2025
1 parent bcce670 commit 2432b3b
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 11 deletions.
2 changes: 2 additions & 0 deletions llumnix/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
from llumnix.entrypoints.utils import LaunchMode


# All the default values of llumnix arguments are set in default.py. So all the arguments here are set to None for default.

class LlumnixArgumentParser(argparse.ArgumentParser):
def __init__(self, *args, **kwargs):
self.cur_namespace = "llumnix"
Expand Down
14 changes: 7 additions & 7 deletions llumnix/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,22 +152,22 @@ def _get_next_instance_args(self, instance_args) -> InstanceArgs:
assert not self.enablde_engine_pd_disagg, \
"Currently not support engine based pd-disaggregation in global launch mode."

config: InstanceArgs = copy.deepcopy(instance_args)
next_instance_args: InstanceArgs = copy.deepcopy(instance_args)
cur_num_prefill = len(self.global_scheduler.dispatch_scheduler.available_dispatch_instance_set)
cur_num_decode = len(self.global_scheduler.instance_id_set -
self.global_scheduler.dispatch_scheduler.available_dispatch_instance_set)
config.instance_type = self._get_next_instance_type(cur_num_prefill, cur_num_decode, self.pd_ratio)
return config
next_instance_args.instance_type = self._get_next_instance_type(cur_num_prefill, cur_num_decode, self.pd_ratio)
return next_instance_args

def _get_next_entrypoints_args(self, entrypoints_args: EntrypointsArgs) -> EntrypointsArgs:
config = copy.deepcopy(entrypoints_args)
next_entrypoints_args = copy.deepcopy(entrypoints_args)
if self.enable_port_increment:
config.port += self.port_offset
config.request_output_queue_port += self.port_offset
next_entrypoints_args.port += self.port_offset
next_entrypoints_args.request_output_queue_port += self.port_offset
self.port_offset += 1
if self.enable_port_offset_store:
put_actor_data_to_ray_internal_kv("manager", "port_offset", self.port_offset)
return config
return next_entrypoints_args

def init_server_and_instance(self, instance_id: str, entrypoints_args: EntrypointsArgs,
instance_args: InstanceArgs, engine_args, backend_type: BackendType,
Expand Down
8 changes: 4 additions & 4 deletions tests/unit_test/global_scheduler/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ async def test_init_server_and_get_instance_deployment_states_and_instance_and_c
@pytest.mark.parametrize("request_output_queue_type", ['rayqueue', 'zmq'])
async def test_auto_scale_up_loop_and_get_cluster_deployment(ray_env, request_output_queue_type):
manager, _, _, _, _ = init_manager_with_launch_mode(LaunchMode.GLOBAL, request_output_queue_type)
await asyncio.sleep(30.0)
await asyncio.sleep(60.0)

num_instances = manager.scale_up([], [], [])
assert num_instances == 4
Expand All @@ -410,7 +410,7 @@ async def test_auto_scale_up_loop_and_get_cluster_deployment(ray_env, request_ou
assert len(instance_ids) == 4
await manager.clear_instance_ray_resources(instance_ids[0])
await manager.clear_instance_ray_resources(instance_ids[1])
await asyncio.sleep(30.0)
await asyncio.sleep(60.0)

num_instances = manager.scale_up([], [], [])
assert num_instances == 4
Expand All @@ -421,7 +421,7 @@ async def test_auto_scale_up_loop_and_get_cluster_deployment(ray_env, request_ou
@pytest.mark.parametrize("request_output_queue_type", ['rayqueue', 'zmq'])
async def test_check_deployment_states_loop_and_auto_scale_up_loop(ray_env, request_output_queue_type):
manager, _, _, _, _ = init_manager_with_launch_mode(LaunchMode.GLOBAL, request_output_queue_type)
await asyncio.sleep(30.0)
await asyncio.sleep(60.0)

num_instances = manager.scale_up([], [], [])
assert num_instances == 4
Expand Down Expand Up @@ -466,7 +466,7 @@ def test_pd_disagg_gloal_launch_instance_type():
async def test_pd_disagg_gloal_launch_deployment_and_auto_scale_up_loop(ray_env, request_output_queue_type):
manager, _, _, _, _ = init_manager_with_launch_mode(LaunchMode.GLOBAL, request_output_queue_type,
enable_pd_disagg=True, pd_ratio="1:1")
await asyncio.sleep(30.0)
await asyncio.sleep(60.0)

num_instances = manager.scale_up([], [], [])
assert num_instances == 4
Expand Down

0 comments on commit 2432b3b

Please sign in to comment.