Skip to content

Commit

Permalink
final touches and enable ci
Browse files Browse the repository at this point in the history
  • Loading branch information
renxida committed Feb 21, 2025
1 parent 889140f commit 5cb7094
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 33 deletions.
48 changes: 48 additions & 0 deletions .github/workflows/pkgci_shark_ai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,54 @@ jobs:
name: smoke-test-${{ matrix.name }}
path: smoke-test-${{ matrix.name }}.xml


direct_to_batcher_test:
name: "Direct to Batcher Test (${{ matrix.name }})"
runs-on: ${{ matrix.runs-on }}
strategy:
fail-fast: false
matrix:
include:
- name: cpu
runs-on: azure-cpubuilder-linux-scale
test_device: cpu
python-version: 3.11
- name: amdgpu_rocm_mi300_gfx942
runs-on: linux-mi300-1gpu-ossci
test_device: gfx942
python-version: 3.11
defaults:
run:
shell: bash
env:
PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
VENV_DIR: ${{ github.workspace }}/.venv
steps:
- name: Run rocminfo
if: contains(matrix.test_device, 'gfx')
run: rocminfo
- name: "Checkout Code"
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: "Set up environment and install PkgCI Artifacts"
uses: ./.github/actions/pkgci-setup
with:
python-version: ${{matrix.python-version}}
artifact-run-id: ${{ inputs.artifact_run_id }}
- name: Run Direct-to-batcher Test
run: |
source ${VENV_DIR}/bin/activate
pytest -v -s --test_device=${{ matrix.test_device }} \
--junitxml=direct-to-batcher-test-${{ matrix.name }}.xml \
app_tests/integration_tests/llm/shortfin/direct_to_batcher_test.py \
--log-cli-level=INFO
- name: Upload Test Results
if: always()
uses: actions/upload-artifact@v4
with:
name: direct-to-batcher-test-${{ matrix.name }}
path: direct-to-batcher-test-${{ matrix.name }}.xml


integration_test:
name: "Integration Test (${{ matrix.name }})"
runs-on: ${{ matrix.runs-on }}
Expand Down
17 changes: 17 additions & 0 deletions app_tests/integration_tests/llm/shortfin/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,23 @@ def server(model_artifacts, request):
process.wait()


@pytest.fixture(scope="module")
def generate_service(model_artifacts, request):
"""Starts and manages the test server."""
model_config = model_artifacts.model_config

server_config = ServerConfig(
artifacts=model_artifacts,
device_settings=model_config.device_settings,
prefix_sharing_algorithm=request.param.get("prefix_sharing", "none"),
)

server_instance = ServerInstance(server_config)
server_instance.port = 0
with server_instance.start_service_only() as gs:
yield gs


@pytest.fixture(scope="module")
def encoded_prompt(model_artifacts: ModelArtifacts, request) -> list[int]:
tokenizer = Tokenizer.from_file(str(model_artifacts.tokenizer_path))
Expand Down
49 changes: 16 additions & 33 deletions app_tests/integration_tests/llm/shortfin/direct_to_batcher_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,13 @@
from shortfin_apps.llm.components.messages import InferencePhase, InferenceExecRequest


@pytest.fixture
def processor():
return ModelProcessor(base_dir="/tmp/model_management")


@pytest.fixture
def model_config():
config = TEST_MODELS["tinystories_llama2_25m"]
config.device_settings = CPU
return config


@pytest.fixture
def server_instance(processor, model_config):
artifacts = processor.process_model(model_config)
sconf = ServerConfig(
artifacts=artifacts,
device_settings=CPU,
prefix_sharing_algorithm="none",
)
sinst = ServerInstance(sconf)
sinst.port = 0
return sinst
pytestmark = pytest.mark.parametrize(
"model_artifacts,generate_service",
[
["tinystories_llama2_25m", {"prefix_sharing": "none"}],
],
indirect=True,
)


class BatchConsistencyTestProcess(sf.Process):
Expand Down Expand Up @@ -97,7 +81,7 @@ async def run(self):
), f"Inconsistent results between batch sizes {self.batch_sizes[0]} and {batch_size}"


def test_batch_and_nobatch_consistency(server_instance):
def test_batch_and_nobatch_consistency(model_artifacts, generate_service):
"""
Test that requests produce identical results regardless of batch size.
Expand All @@ -107,12 +91,11 @@ def test_batch_and_nobatch_consistency(server_instance):
- improper seq_len / current_position handling in service.py
- improper masking in sharktank
"""
with server_instance.start_service_only() as generate_service:
# Create and run the test process
test_process = BatchConsistencyTestProcess(
generate_service,
input_tokens=[1, 2, 3, 4],
batch_sizes=[1, 2, 3, 4],
max_response_length=3,
)
test_process.launch()
# Create and run the test process
test_process = BatchConsistencyTestProcess(
generate_service,
input_tokens=[1, 2, 3, 4],
batch_sizes=[1, 2, 3, 4],
max_response_length=3,
)
test_process.launch()

0 comments on commit 5cb7094

Please sign in to comment.