diff --git a/app_tests/integration_tests/llm/shortfin/service_only/test_batch_size_consistency.py b/app_tests/integration_tests/llm/shortfin/direct_to_batcher_test.py similarity index 92% rename from app_tests/integration_tests/llm/shortfin/service_only/test_batch_size_consistency.py rename to app_tests/integration_tests/llm/shortfin/direct_to_batcher_test.py index 53e024b32..606e21d32 100644 --- a/app_tests/integration_tests/llm/shortfin/service_only/test_batch_size_consistency.py +++ b/app_tests/integration_tests/llm/shortfin/direct_to_batcher_test.py @@ -98,7 +98,15 @@ async def run(self): def test_batch_and_nobatch_consistency(server_instance): - """Test that requests produce identical results regardless of batch size.""" + """ + Test that requests produce identical results regardless of batch size. + + If this test fails, it means that changing the batch size changes the generation results. + + Look for kvcache corruption due to + - improper seq_len / current_position handling in service.py + - improper masking in sharktank + """ with server_instance.start_service_only() as generate_service: # Create and run the test process test_process = BatchConsistencyTestProcess(