diff --git a/.github/workflows/bench_test.yml b/.github/workflows/bench_test.yml index b5ec057c..6cef48c3 100644 --- a/.github/workflows/bench_test.yml +++ b/.github/workflows/bench_test.yml @@ -1,9 +1,6 @@ name: bench_test on: - push: - branches: - - main pull_request: branches: - main diff --git a/.github/workflows/e2e_test.yml b/.github/workflows/e2e_test.yml index 9ef05995..4370d4ed 100644 --- a/.github/workflows/e2e_test.yml +++ b/.github/workflows/e2e_test.yml @@ -1,9 +1,6 @@ name: e2e_test on: - push: - branches: - - main pull_request: branches: - main diff --git a/.github/workflows/migration_test.yml b/.github/workflows/migration_test.yml index 92e03af6..bb6cd3c6 100644 --- a/.github/workflows/migration_test.yml +++ b/.github/workflows/migration_test.yml @@ -1,9 +1,6 @@ name: migration_test on: - push: - branches: - - main pull_request: branches: - main diff --git a/.github/workflows/offline_inference.yml b/.github/workflows/offline_inference.yml index 8b3c62ea..2d2501b5 100644 --- a/.github/workflows/offline_inference.yml +++ b/.github/workflows/offline_inference.yml @@ -1,9 +1,6 @@ name: offline_inference on: - push: - branches: - - main pull_request: branches: - main diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index a3f123ab..e81bdf17 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -1,9 +1,6 @@ name: pylint on: - push: - branches: - - main pull_request: branches: - main diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml index e5540385..864989c4 100644 --- a/.github/workflows/unit_test.yml +++ b/.github/workflows/unit_test.yml @@ -1,9 +1,6 @@ name: unit_test on: - push: - branches: - - main pull_request: branches: - main diff --git a/.github/workflows/whl.yml b/.github/workflows/whl_build.yml similarity index 93% rename from .github/workflows/whl.yml rename to .github/workflows/whl_build.yml index f4cba196..ced47c3c 100644 --- a/.github/workflows/whl.yml +++ b/.github/workflows/whl_build.yml @@ -1,9 +1,6 @@ name: whl_build on: - push: - branches: - - main pull_request: branches: - main diff --git a/configs/base.yml b/configs/base.yml index b06ce791..4b21fa67 100644 --- a/configs/base.yml +++ b/configs/base.yml @@ -1,6 +1,7 @@ SERVER: HOST: '127.0.0.1' PORT: 37000 + QUEUE_TYPE: "rayqueue" RAY: RAY_CLUSTER_PORT: 30037 diff --git a/llumnix/config/default.py b/llumnix/config/default.py index 9b1d5319..dfa853f7 100644 --- a/llumnix/config/default.py +++ b/llumnix/config/default.py @@ -26,6 +26,8 @@ _C.SERVER.HOST = "localhost" # Port number for the server _C.SERVER.PORT = 8000 +# Queue type for request output queue +_C.SERVER.QUEUE_TYPE = "rayqueue" # Port number for the request output queue _C.SERVER.REQUEST_OUTPUT_QUEUE_PORT = 1234 # Path to SSL key file for secure connections diff --git a/llumnix/entrypoints/vllm/api_server.py b/llumnix/entrypoints/vllm/api_server.py index bee8b0d8..f369e125 100644 --- a/llumnix/entrypoints/vllm/api_server.py +++ b/llumnix/entrypoints/vllm/api_server.py @@ -250,7 +250,8 @@ def add_argument(self, *args, **kwargs): parser.add_argument('--disable-log-requests-server', action='store_true', help='disable logging requests in server') parser.add_argument("--ray-cluster-port", type=int) parser.add_argument('--launch-ray-cluster', action='store_true', help='if launch ray cluster in api server') - parser.add_argument("--request-output-queue-port", type=int) + parser.add_argument("--queue-type", type=str, choices=['rayqueue', 'zmq'], help='queue type for request output queue') + parser.add_argument("--request-output-queue-port", type=int, help='port for zeromq') parser.add_argument("--config-file", help="path to config file") parser = EngineManagerArgs.add_cli_args(parser)