From 5cb7094dbb3c45226f6bfe4b1f6641fea9b54d37 Mon Sep 17 00:00:00 2001 From: Cedar Date: Fri, 21 Feb 2025 13:52:33 -0800 Subject: [PATCH] final touches and enable ci --- .github/workflows/pkgci_shark_ai.yml | 48 ++++++++++++++++++ .../llm/shortfin/conftest.py | 17 +++++++ .../llm/shortfin/direct_to_batcher_test.py | 49 ++++++------------- 3 files changed, 81 insertions(+), 33 deletions(-) diff --git a/.github/workflows/pkgci_shark_ai.yml b/.github/workflows/pkgci_shark_ai.yml index 9154b7644..1fe3d271c 100644 --- a/.github/workflows/pkgci_shark_ai.yml +++ b/.github/workflows/pkgci_shark_ai.yml @@ -66,6 +66,54 @@ jobs: name: smoke-test-${{ matrix.name }} path: smoke-test-${{ matrix.name }}.xml + + direct_to_batcher_test: + name: "Direct to Batcher Test (${{ matrix.name }})" + runs-on: ${{ matrix.runs-on }} + strategy: + fail-fast: false + matrix: + include: + - name: cpu + runs-on: azure-cpubuilder-linux-scale + test_device: cpu + python-version: 3.11 + - name: amdgpu_rocm_mi300_gfx942 + runs-on: linux-mi300-1gpu-ossci + test_device: gfx942 + python-version: 3.11 + defaults: + run: + shell: bash + env: + PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages + VENV_DIR: ${{ github.workspace }}/.venv + steps: + - name: Run rocminfo + if: contains(matrix.test_device, 'gfx') + run: rocminfo + - name: "Checkout Code" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: "Set up environment and install PkgCI Artifacts" + uses: ./.github/actions/pkgci-setup + with: + python-version: ${{matrix.python-version}} + artifact-run-id: ${{ inputs.artifact_run_id }} + - name: Run Direct-to-batcher Test + run: | + source ${VENV_DIR}/bin/activate + pytest -v -s --test_device=${{ matrix.test_device }} \ + --junitxml=direct-to-batcher-test-${{ matrix.name }}.xml \ + app_tests/integration_tests/llm/shortfin/direct_to_batcher_test.py \ + --log-cli-level=INFO + - name: Upload Test Results + if: always() + uses: actions/upload-artifact@v4 + with: + name: direct-to-batcher-test-${{ matrix.name }} + path: direct-to-batcher-test-${{ matrix.name }}.xml + + integration_test: name: "Integration Test (${{ matrix.name }})" runs-on: ${{ matrix.runs-on }} diff --git a/app_tests/integration_tests/llm/shortfin/conftest.py b/app_tests/integration_tests/llm/shortfin/conftest.py index 7996a1169..74b246556 100644 --- a/app_tests/integration_tests/llm/shortfin/conftest.py +++ b/app_tests/integration_tests/llm/shortfin/conftest.py @@ -78,6 +78,23 @@ def server(model_artifacts, request): process.wait() +@pytest.fixture(scope="module") +def generate_service(model_artifacts, request): + """Starts and manages the test server.""" + model_config = model_artifacts.model_config + + server_config = ServerConfig( + artifacts=model_artifacts, + device_settings=model_config.device_settings, + prefix_sharing_algorithm=request.param.get("prefix_sharing", "none"), + ) + + server_instance = ServerInstance(server_config) + server_instance.port = 0 + with server_instance.start_service_only() as gs: + yield gs + + @pytest.fixture(scope="module") def encoded_prompt(model_artifacts: ModelArtifacts, request) -> list[int]: tokenizer = Tokenizer.from_file(str(model_artifacts.tokenizer_path)) diff --git a/app_tests/integration_tests/llm/shortfin/direct_to_batcher_test.py b/app_tests/integration_tests/llm/shortfin/direct_to_batcher_test.py index 606e21d32..8218c041d 100644 --- a/app_tests/integration_tests/llm/shortfin/direct_to_batcher_test.py +++ b/app_tests/integration_tests/llm/shortfin/direct_to_batcher_test.py @@ -12,29 +12,13 @@ from shortfin_apps.llm.components.messages import InferencePhase, InferenceExecRequest -@pytest.fixture -def processor(): - return ModelProcessor(base_dir="/tmp/model_management") - - -@pytest.fixture -def model_config(): - config = TEST_MODELS["tinystories_llama2_25m"] - config.device_settings = CPU - return config - - -@pytest.fixture -def server_instance(processor, model_config): - artifacts = processor.process_model(model_config) - sconf = ServerConfig( - artifacts=artifacts, - device_settings=CPU, - prefix_sharing_algorithm="none", - ) - sinst = ServerInstance(sconf) - sinst.port = 0 - return sinst +pytestmark = pytest.mark.parametrize( + "model_artifacts,generate_service", + [ + ["tinystories_llama2_25m", {"prefix_sharing": "none"}], + ], + indirect=True, +) class BatchConsistencyTestProcess(sf.Process): @@ -97,7 +81,7 @@ async def run(self): ), f"Inconsistent results between batch sizes {self.batch_sizes[0]} and {batch_size}" -def test_batch_and_nobatch_consistency(server_instance): +def test_batch_and_nobatch_consistency(model_artifacts, generate_service): """ Test that requests produce identical results regardless of batch size. @@ -107,12 +91,11 @@ def test_batch_and_nobatch_consistency(server_instance): - improper seq_len / current_position handling in service.py - improper masking in sharktank """ - with server_instance.start_service_only() as generate_service: - # Create and run the test process - test_process = BatchConsistencyTestProcess( - generate_service, - input_tokens=[1, 2, 3, 4], - batch_sizes=[1, 2, 3, 4], - max_response_length=3, - ) - test_process.launch() + # Create and run the test process + test_process = BatchConsistencyTestProcess( + generate_service, + input_tokens=[1, 2, 3, 4], + batch_sizes=[1, 2, 3, 4], + max_response_length=3, + ) + test_process.launch()