From 3e530343ec598d205f89a3d6f7c848b5dd1b7250 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Wed, 12 Feb 2025 18:44:20 -0600 Subject: [PATCH 01/35] added print debugging Signed-off-by: Elias Joseph --- app_tests/integration_tests/llm/sglang/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app_tests/integration_tests/llm/sglang/conftest.py b/app_tests/integration_tests/llm/sglang/conftest.py index 9f2662639..80c5e55fb 100644 --- a/app_tests/integration_tests/llm/sglang/conftest.py +++ b/app_tests/integration_tests/llm/sglang/conftest.py @@ -27,6 +27,7 @@ def register_shortfin_backend(port): + print("------HERE------" + str(port)) backend = sgl.Shortfin( chat_template=get_chat_template("llama-3-instruct"), base_url=f"http://localhost:{port}", From 428686fdb4be6d26725c6a44dd41623230e8ff8d Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 14 Feb 2025 16:17:02 -0600 Subject: [PATCH 02/35] shortened tests for faster iterations Signed-off-by: Elias Joseph --- .../llm/sglang/sglang_frontend_test.py | 386 +++++++++--------- 1 file changed, 193 insertions(+), 193 deletions(-) diff --git a/app_tests/integration_tests/llm/sglang/sglang_frontend_test.py b/app_tests/integration_tests/llm/sglang/sglang_frontend_test.py index 1b2663a18..3fbc38cb6 100644 --- a/app_tests/integration_tests/llm/sglang/sglang_frontend_test.py +++ b/app_tests/integration_tests/llm/sglang/sglang_frontend_test.py @@ -132,196 +132,196 @@ def test_multi_turn_qa(model_artifacts, start_server, load_comparison_model): logger.info("Similarity passed.") -@pytest.mark.parametrize( - "model_artifacts,start_server,load_comparison_model", - [ - ( - {"device_settings": DEVICE_SETTINGS}, - {"device_settings": DEVICE_SETTINGS}, - None, - ) - ], - indirect=True, -) -def test_stream_multi_turn_qa(model_artifacts, start_server, load_comparison_model): - server, port = start_server - register_shortfin_backend(port) - - def clean_message(message: str): - """Remove chat tags from message before comparison. - - Args: - message (str): Message to clean. - - Returns: - str: Message without tags (i.e. <|start_header_id|>) - """ - pattern = r"<\|.*?\|>" - return re.sub(pattern, "", message) - - model = load_comparison_model - question_1 = "Name the capital city of the USA." - question_2 = "The Smithsonian is in this location." - expected_answer_1 = "The capital city of the United States of America is Washington, D.C. (short for District of Columbia).assistant\n\nWould you like to know more about Washington, D.C. or is there something else I can help you with?" - expected_answer_2 = "The Smithsonian Institution is indeed located in Washington, D.C. and is one of the world's largest and most comprehensive museums and research complexes. It was founded in 1846 and is named after British scientist James Smithson, who left a bequest to" - - logger.info("Testing multi-turn Q&A run w/ stream...") - state = multi_turn_question.run( - question_1=question_1, - question_2=question_2, - stream=True, - ) - messages = "" - for chunk in state.text_iter(): - messages += chunk - logger.info("Received messages from multi-turn call.") - - logger.info("Computing similarity between expectation and result") - expected_result = f"user: {question_1}\nassistant: {expected_answer_1}\nuser: {question_2}\nassistant: {expected_answer_2}" - cleaned_messages = clean_message(messages) - score = compute_similarity(model, cleaned_messages, expected_result) - if not score > ACCEPTED_THRESHOLD: - raise AccuracyValidationException( - f"Accuracy error between {expected_result} and {messages}:\n SCORE: {score}" - ) - logger.info("Similarity passed.") - - -@pytest.mark.parametrize( - "model_artifacts,start_server,load_comparison_model", - [ - ( - {"device_settings": DEVICE_SETTINGS}, - {"device_settings": DEVICE_SETTINGS}, - None, - ) - ], - indirect=True, -) -def test_batch_multi_turn_qa(model_artifacts, start_server, load_comparison_model): - server, port = start_server - register_shortfin_backend(port) - model = load_comparison_model - - question_1_1 = "Name the capital city of the USA." - question_1_2 = "The Smithsonian is in this location." - expected_answer_1_1 = "The capital city of the United States of America is Washington, D.C. (short for District of Columbia).assistant\n\nWould you like to know more about Washington, D.C. or is there something else I can help you with?" - expected_answer_1_2 = "The Smithsonian Institution is indeed located in Washington, D.C. and is one of the world's largest and most comprehensive museums and research complexes. It was founded in 1846 and is named after British scientist James Smithson, who left a bequest to" - - question_2_1 = "Name the largest city in the USA." - question_2_2 = "The Empire State Building is in this location." - expected_answer_2_1 = "The largest city in the USA is New York City, with a population of over 8.4 million people, according to the United States Census Bureau (2020 estimates).assistant\n\nHowever, I should note that the largest city in the" - expected_answer_2_2 = "That's correct, the iconic Empire State Building is located in Midtown Manhattan, New York City. It's one of the most recognizable landmarks in the world and a symbol of the city's grandeur and history.assistant\n\nAnd, by" - - logger.info("Testing batch multi-turn Q&A run...") - states = multi_turn_question.run_batch( - [ - { - "question_1": question_1_1, - "question_2": question_1_2, - }, - { - "question_1": question_2_1, - "question_2": question_2_2, - }, - ] - ) - - first_qa = states[0] - second_qa = states[1] - - first_qa_messages = first_qa.messages() - second_qa_messages = second_qa.messages() - - logger.info("Testing first batch of messages...") - assert first_qa_messages[0] == { - "role": "user", - "content": question_1_1, - } - - assert first_qa_messages[1]["role"] == "assistant" - first_answer = first_qa_messages[1]["content"] - expected_answer = expected_answer_1_1 - score = compute_similarity(model, expected_answer, first_answer) - if not score > ACCEPTED_THRESHOLD: - raise AccuracyValidationException( - f"Accuracy error between {expected_answer} and {first_answer}:\n SCORE: {score}" - ) - - assert first_qa_messages[2] == { - "role": "user", - "content": question_1_2, - } - first_qa_messages[3]["role"] = "assistant" - second_answer = first_qa_messages[3]["content"] - expected_answer = expected_answer_1_2 - score = compute_similarity(model, expected_answer, second_answer) - if not score > ACCEPTED_THRESHOLD: - raise AccuracyValidationException( - f"Accuracy error between {expected_answer} and {second_answer}:\n SCORE: {score}" - ) - logger.info("First batch passed.") - - logger.info("Testing second batch of messages...") - assert second_qa_messages[0] == { - "role": "user", - "content": question_2_1, - } - - assert second_qa_messages[1]["role"] == "assistant" - first_answer = second_qa_messages[1]["content"] - expected_answer = expected_answer_2_1 - score = compute_similarity(model, expected_answer, first_answer) - if not score > ACCEPTED_THRESHOLD: - raise AccuracyValidationException( - f"Accuracy error between {expected_answer} and {first_answer}:\n SCORE: {score}" - ) - - assert second_qa_messages[2] == { - "role": "user", - "content": question_2_2, - } - second_qa_messages[3]["role"] = "assistant" - second_answer = second_qa_messages[3]["content"] - expected_answer = expected_answer_2_2 - score = compute_similarity(model, expected_answer, second_answer) - if not score > ACCEPTED_THRESHOLD: - raise AccuracyValidationException( - f"Accuracy error between {expected_answer} and {second_answer}:\n SCORE: {score}" - ) - logger.info("Second batch passed.") - - -@pytest.mark.parametrize( - "model_artifacts,start_server,load_comparison_model", - [ - ( - {"device_settings": DEVICE_SETTINGS}, - {"device_settings": DEVICE_SETTINGS}, - None, - ) - ], - indirect=True, -) -def test_fork(model_artifacts, start_server, load_comparison_model): - server, port = start_server - register_shortfin_backend(port) - model = load_comparison_model - - logger.info("Testing fork...") - state = tip_suggestion.run() - result = state.text() - logger.info("Fork response received.") - - logger.info("Computing similarity...") - expected_answer = """Here are two tips for staying healthy: 1. Balanced Diet. 2. Regular Exercise. - Tip 1:A balanced diet is essential for maintaining good health. It involves consuming a variety of foods from different food groups, including fruits, vegetables, whole grains, lean proteins, and healthy fats. A balanced diet provides the body with the necessary nutrients, vitamins, and - Tip 2:Regular exercise is essential for maintaining a healthy body. It helps to improve cardiovascular health, increase strength and flexibility, and boost the immune system. Regular physical activity can also reduce the risk of chronic diseases such as heart disease, diabetes, and certain types of cancer - In summary, a balanced diet and regular exercise are two of the most important tips for staying healthy. By following these tips, you can maintain a healthy body and reduce the risk of chronic diseases. - """ - score = compute_similarity(model, result, expected_answer) - if not score > ACCEPTED_THRESHOLD: - raise AccuracyValidationException( - f"Accuracy error between {expected_answer} and {result}:\n SCORE: {score}" - ) - logger.info("Similarity passed.") +# @pytest.mark.parametrize( +# "model_artifacts,start_server,load_comparison_model", +# [ +# ( +# {"device_settings": DEVICE_SETTINGS}, +# {"device_settings": DEVICE_SETTINGS}, +# None, +# ) +# ], +# indirect=True, +# ) +# def test_stream_multi_turn_qa(model_artifacts, start_server, load_comparison_model): +# server, port = start_server +# register_shortfin_backend(port) + +# def clean_message(message: str): +# """Remove chat tags from message before comparison. + +# Args: +# message (str): Message to clean. + +# Returns: +# str: Message without tags (i.e. <|start_header_id|>) +# """ +# pattern = r"<\|.*?\|>" +# return re.sub(pattern, "", message) + +# model = load_comparison_model +# question_1 = "Name the capital city of the USA." +# question_2 = "The Smithsonian is in this location." +# expected_answer_1 = "The capital city of the United States of America is Washington, D.C. (short for District of Columbia).assistant\n\nWould you like to know more about Washington, D.C. or is there something else I can help you with?" +# expected_answer_2 = "The Smithsonian Institution is indeed located in Washington, D.C. and is one of the world's largest and most comprehensive museums and research complexes. It was founded in 1846 and is named after British scientist James Smithson, who left a bequest to" + +# logger.info("Testing multi-turn Q&A run w/ stream...") +# state = multi_turn_question.run( +# question_1=question_1, +# question_2=question_2, +# stream=True, +# ) +# messages = "" +# for chunk in state.text_iter(): +# messages += chunk +# logger.info("Received messages from multi-turn call.") + +# logger.info("Computing similarity between expectation and result") +# expected_result = f"user: {question_1}\nassistant: {expected_answer_1}\nuser: {question_2}\nassistant: {expected_answer_2}" +# cleaned_messages = clean_message(messages) +# score = compute_similarity(model, cleaned_messages, expected_result) +# if not score > ACCEPTED_THRESHOLD: +# raise AccuracyValidationException( +# f"Accuracy error between {expected_result} and {messages}:\n SCORE: {score}" +# ) +# logger.info("Similarity passed.") + + +# @pytest.mark.parametrize( +# "model_artifacts,start_server,load_comparison_model", +# [ +# ( +# {"device_settings": DEVICE_SETTINGS}, +# {"device_settings": DEVICE_SETTINGS}, +# None, +# ) +# ], +# indirect=True, +# ) +# def test_batch_multi_turn_qa(model_artifacts, start_server, load_comparison_model): +# server, port = start_server +# register_shortfin_backend(port) +# model = load_comparison_model + +# question_1_1 = "Name the capital city of the USA." +# question_1_2 = "The Smithsonian is in this location." +# expected_answer_1_1 = "The capital city of the United States of America is Washington, D.C. (short for District of Columbia).assistant\n\nWould you like to know more about Washington, D.C. or is there something else I can help you with?" +# expected_answer_1_2 = "The Smithsonian Institution is indeed located in Washington, D.C. and is one of the world's largest and most comprehensive museums and research complexes. It was founded in 1846 and is named after British scientist James Smithson, who left a bequest to" + +# question_2_1 = "Name the largest city in the USA." +# question_2_2 = "The Empire State Building is in this location." +# expected_answer_2_1 = "The largest city in the USA is New York City, with a population of over 8.4 million people, according to the United States Census Bureau (2020 estimates).assistant\n\nHowever, I should note that the largest city in the" +# expected_answer_2_2 = "That's correct, the iconic Empire State Building is located in Midtown Manhattan, New York City. It's one of the most recognizable landmarks in the world and a symbol of the city's grandeur and history.assistant\n\nAnd, by" + +# logger.info("Testing batch multi-turn Q&A run...") +# states = multi_turn_question.run_batch( +# [ +# { +# "question_1": question_1_1, +# "question_2": question_1_2, +# }, +# { +# "question_1": question_2_1, +# "question_2": question_2_2, +# }, +# ] +# ) + +# first_qa = states[0] +# second_qa = states[1] + +# first_qa_messages = first_qa.messages() +# second_qa_messages = second_qa.messages() + +# logger.info("Testing first batch of messages...") +# assert first_qa_messages[0] == { +# "role": "user", +# "content": question_1_1, +# } + +# assert first_qa_messages[1]["role"] == "assistant" +# first_answer = first_qa_messages[1]["content"] +# expected_answer = expected_answer_1_1 +# score = compute_similarity(model, expected_answer, first_answer) +# if not score > ACCEPTED_THRESHOLD: +# raise AccuracyValidationException( +# f"Accuracy error between {expected_answer} and {first_answer}:\n SCORE: {score}" +# ) + +# assert first_qa_messages[2] == { +# "role": "user", +# "content": question_1_2, +# } +# first_qa_messages[3]["role"] = "assistant" +# second_answer = first_qa_messages[3]["content"] +# expected_answer = expected_answer_1_2 +# score = compute_similarity(model, expected_answer, second_answer) +# if not score > ACCEPTED_THRESHOLD: +# raise AccuracyValidationException( +# f"Accuracy error between {expected_answer} and {second_answer}:\n SCORE: {score}" +# ) +# logger.info("First batch passed.") + +# logger.info("Testing second batch of messages...") +# assert second_qa_messages[0] == { +# "role": "user", +# "content": question_2_1, +# } + +# assert second_qa_messages[1]["role"] == "assistant" +# first_answer = second_qa_messages[1]["content"] +# expected_answer = expected_answer_2_1 +# score = compute_similarity(model, expected_answer, first_answer) +# if not score > ACCEPTED_THRESHOLD: +# raise AccuracyValidationException( +# f"Accuracy error between {expected_answer} and {first_answer}:\n SCORE: {score}" +# ) + +# assert second_qa_messages[2] == { +# "role": "user", +# "content": question_2_2, +# } +# second_qa_messages[3]["role"] = "assistant" +# second_answer = second_qa_messages[3]["content"] +# expected_answer = expected_answer_2_2 +# score = compute_similarity(model, expected_answer, second_answer) +# if not score > ACCEPTED_THRESHOLD: +# raise AccuracyValidationException( +# f"Accuracy error between {expected_answer} and {second_answer}:\n SCORE: {score}" +# ) +# logger.info("Second batch passed.") + + +# @pytest.mark.parametrize( +# "model_artifacts,start_server,load_comparison_model", +# [ +# ( +# {"device_settings": DEVICE_SETTINGS}, +# {"device_settings": DEVICE_SETTINGS}, +# None, +# ) +# ], +# indirect=True, +# ) +# def test_fork(model_artifacts, start_server, load_comparison_model): +# server, port = start_server +# register_shortfin_backend(port) +# model = load_comparison_model + +# logger.info("Testing fork...") +# state = tip_suggestion.run() +# result = state.text() +# logger.info("Fork response received.") + +# logger.info("Computing similarity...") +# expected_answer = """Here are two tips for staying healthy: 1. Balanced Diet. 2. Regular Exercise. +# Tip 1:A balanced diet is essential for maintaining good health. It involves consuming a variety of foods from different food groups, including fruits, vegetables, whole grains, lean proteins, and healthy fats. A balanced diet provides the body with the necessary nutrients, vitamins, and +# Tip 2:Regular exercise is essential for maintaining a healthy body. It helps to improve cardiovascular health, increase strength and flexibility, and boost the immune system. Regular physical activity can also reduce the risk of chronic diseases such as heart disease, diabetes, and certain types of cancer +# In summary, a balanced diet and regular exercise are two of the most important tips for staying healthy. By following these tips, you can maintain a healthy body and reduce the risk of chronic diseases. +# """ +# score = compute_similarity(model, result, expected_answer) +# if not score > ACCEPTED_THRESHOLD: +# raise AccuracyValidationException( +# f"Accuracy error between {expected_answer} and {result}:\n SCORE: {score}" +# ) +# logger.info("Similarity passed.") From e4c95011a060147db63a21074b5a36add0da17b0 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 14 Feb 2025 16:41:28 -0600 Subject: [PATCH 03/35] attempted fix Signed-off-by: Elias Joseph --- app_tests/integration_tests/llm/sglang/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app_tests/integration_tests/llm/sglang/conftest.py b/app_tests/integration_tests/llm/sglang/conftest.py index 80c5e55fb..c510e1d24 100644 --- a/app_tests/integration_tests/llm/sglang/conftest.py +++ b/app_tests/integration_tests/llm/sglang/conftest.py @@ -55,7 +55,6 @@ def model_artifacts(request, tmp_path_factory): @pytest.fixture(scope="module") def start_server(request, model_artifacts): - os.environ["ROCR_VISIBLE_DEVICES"] = "0" device_settings = request.param["device_settings"] server_config = ServerConfig( From 1cde8385f2b7986e5aef838442bcc344fdb2aa43 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 14 Feb 2025 16:47:13 -0600 Subject: [PATCH 04/35] fixed device issue Signed-off-by: Elias Joseph --- .../integration_tests/llm/sglang/conftest.py | 1 + .../llm/sglang/sglang_frontend_test.py | 386 +++++++++--------- 2 files changed, 194 insertions(+), 193 deletions(-) diff --git a/app_tests/integration_tests/llm/sglang/conftest.py b/app_tests/integration_tests/llm/sglang/conftest.py index c510e1d24..80c5e55fb 100644 --- a/app_tests/integration_tests/llm/sglang/conftest.py +++ b/app_tests/integration_tests/llm/sglang/conftest.py @@ -55,6 +55,7 @@ def model_artifacts(request, tmp_path_factory): @pytest.fixture(scope="module") def start_server(request, model_artifacts): + os.environ["ROCR_VISIBLE_DEVICES"] = "0" device_settings = request.param["device_settings"] server_config = ServerConfig( diff --git a/app_tests/integration_tests/llm/sglang/sglang_frontend_test.py b/app_tests/integration_tests/llm/sglang/sglang_frontend_test.py index 3fbc38cb6..1b2663a18 100644 --- a/app_tests/integration_tests/llm/sglang/sglang_frontend_test.py +++ b/app_tests/integration_tests/llm/sglang/sglang_frontend_test.py @@ -132,196 +132,196 @@ def test_multi_turn_qa(model_artifacts, start_server, load_comparison_model): logger.info("Similarity passed.") -# @pytest.mark.parametrize( -# "model_artifacts,start_server,load_comparison_model", -# [ -# ( -# {"device_settings": DEVICE_SETTINGS}, -# {"device_settings": DEVICE_SETTINGS}, -# None, -# ) -# ], -# indirect=True, -# ) -# def test_stream_multi_turn_qa(model_artifacts, start_server, load_comparison_model): -# server, port = start_server -# register_shortfin_backend(port) - -# def clean_message(message: str): -# """Remove chat tags from message before comparison. - -# Args: -# message (str): Message to clean. - -# Returns: -# str: Message without tags (i.e. <|start_header_id|>) -# """ -# pattern = r"<\|.*?\|>" -# return re.sub(pattern, "", message) - -# model = load_comparison_model -# question_1 = "Name the capital city of the USA." -# question_2 = "The Smithsonian is in this location." -# expected_answer_1 = "The capital city of the United States of America is Washington, D.C. (short for District of Columbia).assistant\n\nWould you like to know more about Washington, D.C. or is there something else I can help you with?" -# expected_answer_2 = "The Smithsonian Institution is indeed located in Washington, D.C. and is one of the world's largest and most comprehensive museums and research complexes. It was founded in 1846 and is named after British scientist James Smithson, who left a bequest to" - -# logger.info("Testing multi-turn Q&A run w/ stream...") -# state = multi_turn_question.run( -# question_1=question_1, -# question_2=question_2, -# stream=True, -# ) -# messages = "" -# for chunk in state.text_iter(): -# messages += chunk -# logger.info("Received messages from multi-turn call.") - -# logger.info("Computing similarity between expectation and result") -# expected_result = f"user: {question_1}\nassistant: {expected_answer_1}\nuser: {question_2}\nassistant: {expected_answer_2}" -# cleaned_messages = clean_message(messages) -# score = compute_similarity(model, cleaned_messages, expected_result) -# if not score > ACCEPTED_THRESHOLD: -# raise AccuracyValidationException( -# f"Accuracy error between {expected_result} and {messages}:\n SCORE: {score}" -# ) -# logger.info("Similarity passed.") - - -# @pytest.mark.parametrize( -# "model_artifacts,start_server,load_comparison_model", -# [ -# ( -# {"device_settings": DEVICE_SETTINGS}, -# {"device_settings": DEVICE_SETTINGS}, -# None, -# ) -# ], -# indirect=True, -# ) -# def test_batch_multi_turn_qa(model_artifacts, start_server, load_comparison_model): -# server, port = start_server -# register_shortfin_backend(port) -# model = load_comparison_model - -# question_1_1 = "Name the capital city of the USA." -# question_1_2 = "The Smithsonian is in this location." -# expected_answer_1_1 = "The capital city of the United States of America is Washington, D.C. (short for District of Columbia).assistant\n\nWould you like to know more about Washington, D.C. or is there something else I can help you with?" -# expected_answer_1_2 = "The Smithsonian Institution is indeed located in Washington, D.C. and is one of the world's largest and most comprehensive museums and research complexes. It was founded in 1846 and is named after British scientist James Smithson, who left a bequest to" - -# question_2_1 = "Name the largest city in the USA." -# question_2_2 = "The Empire State Building is in this location." -# expected_answer_2_1 = "The largest city in the USA is New York City, with a population of over 8.4 million people, according to the United States Census Bureau (2020 estimates).assistant\n\nHowever, I should note that the largest city in the" -# expected_answer_2_2 = "That's correct, the iconic Empire State Building is located in Midtown Manhattan, New York City. It's one of the most recognizable landmarks in the world and a symbol of the city's grandeur and history.assistant\n\nAnd, by" - -# logger.info("Testing batch multi-turn Q&A run...") -# states = multi_turn_question.run_batch( -# [ -# { -# "question_1": question_1_1, -# "question_2": question_1_2, -# }, -# { -# "question_1": question_2_1, -# "question_2": question_2_2, -# }, -# ] -# ) - -# first_qa = states[0] -# second_qa = states[1] - -# first_qa_messages = first_qa.messages() -# second_qa_messages = second_qa.messages() - -# logger.info("Testing first batch of messages...") -# assert first_qa_messages[0] == { -# "role": "user", -# "content": question_1_1, -# } - -# assert first_qa_messages[1]["role"] == "assistant" -# first_answer = first_qa_messages[1]["content"] -# expected_answer = expected_answer_1_1 -# score = compute_similarity(model, expected_answer, first_answer) -# if not score > ACCEPTED_THRESHOLD: -# raise AccuracyValidationException( -# f"Accuracy error between {expected_answer} and {first_answer}:\n SCORE: {score}" -# ) - -# assert first_qa_messages[2] == { -# "role": "user", -# "content": question_1_2, -# } -# first_qa_messages[3]["role"] = "assistant" -# second_answer = first_qa_messages[3]["content"] -# expected_answer = expected_answer_1_2 -# score = compute_similarity(model, expected_answer, second_answer) -# if not score > ACCEPTED_THRESHOLD: -# raise AccuracyValidationException( -# f"Accuracy error between {expected_answer} and {second_answer}:\n SCORE: {score}" -# ) -# logger.info("First batch passed.") - -# logger.info("Testing second batch of messages...") -# assert second_qa_messages[0] == { -# "role": "user", -# "content": question_2_1, -# } - -# assert second_qa_messages[1]["role"] == "assistant" -# first_answer = second_qa_messages[1]["content"] -# expected_answer = expected_answer_2_1 -# score = compute_similarity(model, expected_answer, first_answer) -# if not score > ACCEPTED_THRESHOLD: -# raise AccuracyValidationException( -# f"Accuracy error between {expected_answer} and {first_answer}:\n SCORE: {score}" -# ) - -# assert second_qa_messages[2] == { -# "role": "user", -# "content": question_2_2, -# } -# second_qa_messages[3]["role"] = "assistant" -# second_answer = second_qa_messages[3]["content"] -# expected_answer = expected_answer_2_2 -# score = compute_similarity(model, expected_answer, second_answer) -# if not score > ACCEPTED_THRESHOLD: -# raise AccuracyValidationException( -# f"Accuracy error between {expected_answer} and {second_answer}:\n SCORE: {score}" -# ) -# logger.info("Second batch passed.") - - -# @pytest.mark.parametrize( -# "model_artifacts,start_server,load_comparison_model", -# [ -# ( -# {"device_settings": DEVICE_SETTINGS}, -# {"device_settings": DEVICE_SETTINGS}, -# None, -# ) -# ], -# indirect=True, -# ) -# def test_fork(model_artifacts, start_server, load_comparison_model): -# server, port = start_server -# register_shortfin_backend(port) -# model = load_comparison_model - -# logger.info("Testing fork...") -# state = tip_suggestion.run() -# result = state.text() -# logger.info("Fork response received.") - -# logger.info("Computing similarity...") -# expected_answer = """Here are two tips for staying healthy: 1. Balanced Diet. 2. Regular Exercise. -# Tip 1:A balanced diet is essential for maintaining good health. It involves consuming a variety of foods from different food groups, including fruits, vegetables, whole grains, lean proteins, and healthy fats. A balanced diet provides the body with the necessary nutrients, vitamins, and -# Tip 2:Regular exercise is essential for maintaining a healthy body. It helps to improve cardiovascular health, increase strength and flexibility, and boost the immune system. Regular physical activity can also reduce the risk of chronic diseases such as heart disease, diabetes, and certain types of cancer -# In summary, a balanced diet and regular exercise are two of the most important tips for staying healthy. By following these tips, you can maintain a healthy body and reduce the risk of chronic diseases. -# """ -# score = compute_similarity(model, result, expected_answer) -# if not score > ACCEPTED_THRESHOLD: -# raise AccuracyValidationException( -# f"Accuracy error between {expected_answer} and {result}:\n SCORE: {score}" -# ) -# logger.info("Similarity passed.") +@pytest.mark.parametrize( + "model_artifacts,start_server,load_comparison_model", + [ + ( + {"device_settings": DEVICE_SETTINGS}, + {"device_settings": DEVICE_SETTINGS}, + None, + ) + ], + indirect=True, +) +def test_stream_multi_turn_qa(model_artifacts, start_server, load_comparison_model): + server, port = start_server + register_shortfin_backend(port) + + def clean_message(message: str): + """Remove chat tags from message before comparison. + + Args: + message (str): Message to clean. + + Returns: + str: Message without tags (i.e. <|start_header_id|>) + """ + pattern = r"<\|.*?\|>" + return re.sub(pattern, "", message) + + model = load_comparison_model + question_1 = "Name the capital city of the USA." + question_2 = "The Smithsonian is in this location." + expected_answer_1 = "The capital city of the United States of America is Washington, D.C. (short for District of Columbia).assistant\n\nWould you like to know more about Washington, D.C. or is there something else I can help you with?" + expected_answer_2 = "The Smithsonian Institution is indeed located in Washington, D.C. and is one of the world's largest and most comprehensive museums and research complexes. It was founded in 1846 and is named after British scientist James Smithson, who left a bequest to" + + logger.info("Testing multi-turn Q&A run w/ stream...") + state = multi_turn_question.run( + question_1=question_1, + question_2=question_2, + stream=True, + ) + messages = "" + for chunk in state.text_iter(): + messages += chunk + logger.info("Received messages from multi-turn call.") + + logger.info("Computing similarity between expectation and result") + expected_result = f"user: {question_1}\nassistant: {expected_answer_1}\nuser: {question_2}\nassistant: {expected_answer_2}" + cleaned_messages = clean_message(messages) + score = compute_similarity(model, cleaned_messages, expected_result) + if not score > ACCEPTED_THRESHOLD: + raise AccuracyValidationException( + f"Accuracy error between {expected_result} and {messages}:\n SCORE: {score}" + ) + logger.info("Similarity passed.") + + +@pytest.mark.parametrize( + "model_artifacts,start_server,load_comparison_model", + [ + ( + {"device_settings": DEVICE_SETTINGS}, + {"device_settings": DEVICE_SETTINGS}, + None, + ) + ], + indirect=True, +) +def test_batch_multi_turn_qa(model_artifacts, start_server, load_comparison_model): + server, port = start_server + register_shortfin_backend(port) + model = load_comparison_model + + question_1_1 = "Name the capital city of the USA." + question_1_2 = "The Smithsonian is in this location." + expected_answer_1_1 = "The capital city of the United States of America is Washington, D.C. (short for District of Columbia).assistant\n\nWould you like to know more about Washington, D.C. or is there something else I can help you with?" + expected_answer_1_2 = "The Smithsonian Institution is indeed located in Washington, D.C. and is one of the world's largest and most comprehensive museums and research complexes. It was founded in 1846 and is named after British scientist James Smithson, who left a bequest to" + + question_2_1 = "Name the largest city in the USA." + question_2_2 = "The Empire State Building is in this location." + expected_answer_2_1 = "The largest city in the USA is New York City, with a population of over 8.4 million people, according to the United States Census Bureau (2020 estimates).assistant\n\nHowever, I should note that the largest city in the" + expected_answer_2_2 = "That's correct, the iconic Empire State Building is located in Midtown Manhattan, New York City. It's one of the most recognizable landmarks in the world and a symbol of the city's grandeur and history.assistant\n\nAnd, by" + + logger.info("Testing batch multi-turn Q&A run...") + states = multi_turn_question.run_batch( + [ + { + "question_1": question_1_1, + "question_2": question_1_2, + }, + { + "question_1": question_2_1, + "question_2": question_2_2, + }, + ] + ) + + first_qa = states[0] + second_qa = states[1] + + first_qa_messages = first_qa.messages() + second_qa_messages = second_qa.messages() + + logger.info("Testing first batch of messages...") + assert first_qa_messages[0] == { + "role": "user", + "content": question_1_1, + } + + assert first_qa_messages[1]["role"] == "assistant" + first_answer = first_qa_messages[1]["content"] + expected_answer = expected_answer_1_1 + score = compute_similarity(model, expected_answer, first_answer) + if not score > ACCEPTED_THRESHOLD: + raise AccuracyValidationException( + f"Accuracy error between {expected_answer} and {first_answer}:\n SCORE: {score}" + ) + + assert first_qa_messages[2] == { + "role": "user", + "content": question_1_2, + } + first_qa_messages[3]["role"] = "assistant" + second_answer = first_qa_messages[3]["content"] + expected_answer = expected_answer_1_2 + score = compute_similarity(model, expected_answer, second_answer) + if not score > ACCEPTED_THRESHOLD: + raise AccuracyValidationException( + f"Accuracy error between {expected_answer} and {second_answer}:\n SCORE: {score}" + ) + logger.info("First batch passed.") + + logger.info("Testing second batch of messages...") + assert second_qa_messages[0] == { + "role": "user", + "content": question_2_1, + } + + assert second_qa_messages[1]["role"] == "assistant" + first_answer = second_qa_messages[1]["content"] + expected_answer = expected_answer_2_1 + score = compute_similarity(model, expected_answer, first_answer) + if not score > ACCEPTED_THRESHOLD: + raise AccuracyValidationException( + f"Accuracy error between {expected_answer} and {first_answer}:\n SCORE: {score}" + ) + + assert second_qa_messages[2] == { + "role": "user", + "content": question_2_2, + } + second_qa_messages[3]["role"] = "assistant" + second_answer = second_qa_messages[3]["content"] + expected_answer = expected_answer_2_2 + score = compute_similarity(model, expected_answer, second_answer) + if not score > ACCEPTED_THRESHOLD: + raise AccuracyValidationException( + f"Accuracy error between {expected_answer} and {second_answer}:\n SCORE: {score}" + ) + logger.info("Second batch passed.") + + +@pytest.mark.parametrize( + "model_artifacts,start_server,load_comparison_model", + [ + ( + {"device_settings": DEVICE_SETTINGS}, + {"device_settings": DEVICE_SETTINGS}, + None, + ) + ], + indirect=True, +) +def test_fork(model_artifacts, start_server, load_comparison_model): + server, port = start_server + register_shortfin_backend(port) + model = load_comparison_model + + logger.info("Testing fork...") + state = tip_suggestion.run() + result = state.text() + logger.info("Fork response received.") + + logger.info("Computing similarity...") + expected_answer = """Here are two tips for staying healthy: 1. Balanced Diet. 2. Regular Exercise. + Tip 1:A balanced diet is essential for maintaining good health. It involves consuming a variety of foods from different food groups, including fruits, vegetables, whole grains, lean proteins, and healthy fats. A balanced diet provides the body with the necessary nutrients, vitamins, and + Tip 2:Regular exercise is essential for maintaining a healthy body. It helps to improve cardiovascular health, increase strength and flexibility, and boost the immune system. Regular physical activity can also reduce the risk of chronic diseases such as heart disease, diabetes, and certain types of cancer + In summary, a balanced diet and regular exercise are two of the most important tips for staying healthy. By following these tips, you can maintain a healthy body and reduce the risk of chronic diseases. + """ + score = compute_similarity(model, result, expected_answer) + if not score > ACCEPTED_THRESHOLD: + raise AccuracyValidationException( + f"Accuracy error between {expected_answer} and {result}:\n SCORE: {score}" + ) + logger.info("Similarity passed.") From fd02a974768f0bb9b9fd62eeb6d033a5a8b7e627 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 14 Feb 2025 18:55:01 -0600 Subject: [PATCH 05/35] removed docker cleanup step Signed-off-by: Elias Joseph --- app_tests/integration_tests/llm/sglang/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app_tests/integration_tests/llm/sglang/conftest.py b/app_tests/integration_tests/llm/sglang/conftest.py index 80c5e55fb..9f2662639 100644 --- a/app_tests/integration_tests/llm/sglang/conftest.py +++ b/app_tests/integration_tests/llm/sglang/conftest.py @@ -27,7 +27,6 @@ def register_shortfin_backend(port): - print("------HERE------" + str(port)) backend = sgl.Shortfin( chat_template=get_chat_template("llama-3-instruct"), base_url=f"http://localhost:{port}", From ab6b5274f9249e6ec1d2dcfda0209df390c042b3 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 14 Feb 2025 19:26:27 -0600 Subject: [PATCH 06/35] moved big test back to old runner Signed-off-by: Elias Joseph --- .github/workflows/ci-llama-large-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-llama-large-tests.yaml b/.github/workflows/ci-llama-large-tests.yaml index 43f0f27a0..a637c4138 100644 --- a/.github/workflows/ci-llama-large-tests.yaml +++ b/.github/workflows/ci-llama-large-tests.yaml @@ -28,7 +28,7 @@ jobs: matrix: version: [3.11] fail-fast: false - runs-on: linux-mi300-1gpu-ossci + runs-on: llama-mi300x-1 defaults: run: shell: bash From 0fa650d945ed63f411dbee03ad637df6af6981b6 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 14 Feb 2025 19:32:08 -0600 Subject: [PATCH 07/35] added ci-sharktank Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 51a34db4f..90da4bb34 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -93,7 +93,7 @@ jobs: strategy: matrix: python-version: [3.11] - runs-on: [llama-mi300x-3] + runs-on: [linux-mi300-1gpu-ossci] fail-fast: false runs-on: ${{matrix.runs-on}} defaults: From c3bd7d21354525154ecb20994546d48d611a4560 Mon Sep 17 00:00:00 2001 From: saienduri Date: Fri, 14 Feb 2025 19:29:12 -0800 Subject: [PATCH 08/35] add hf token --- .github/workflows/ci-sharktank.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 90da4bb34..96ae2dffe 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -102,6 +102,7 @@ jobs: env: VENV_DIR: ${{ github.workspace }}/.venv HF_HOME: "/data/huggingface" + HF_TOKEN: ${{ secrets.HF_TOKEN }} steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 From a5a143eefbb866c55b7cd743bd1ce9599e4a9c2e Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Sun, 16 Feb 2025 19:11:10 -0600 Subject: [PATCH 09/35] removed sharktank workflow because I dont have a HF token --- .github/workflows/ci-sharktank.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 96ae2dffe..218ba5b4a 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -93,7 +93,7 @@ jobs: strategy: matrix: python-version: [3.11] - runs-on: [linux-mi300-1gpu-ossci] + runs-on: [llama-mi300x-3] fail-fast: false runs-on: ${{matrix.runs-on}} defaults: From 2d93939977fa48aaea51d8eba3342ae5fde7614e Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Sun, 16 Feb 2025 19:34:58 -0600 Subject: [PATCH 10/35] added back large test Signed-off-by: Elias Joseph --- .github/workflows/ci-llama-large-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-llama-large-tests.yaml b/.github/workflows/ci-llama-large-tests.yaml index a637c4138..43f0f27a0 100644 --- a/.github/workflows/ci-llama-large-tests.yaml +++ b/.github/workflows/ci-llama-large-tests.yaml @@ -28,7 +28,7 @@ jobs: matrix: version: [3.11] fail-fast: false - runs-on: llama-mi300x-1 + runs-on: linux-mi300-1gpu-ossci defaults: run: shell: bash From d98ca17409cb889255f91e196b4d371f119ad510 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Mon, 17 Feb 2025 02:15:25 -0600 Subject: [PATCH 11/35] reverted llama bench for merge Signed-off-by: Elias Joseph --- .github/workflows/ci-llama-large-tests.yaml | 2 +- .github/workflows/ci-sharktank.yml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci-llama-large-tests.yaml b/.github/workflows/ci-llama-large-tests.yaml index 43f0f27a0..a637c4138 100644 --- a/.github/workflows/ci-llama-large-tests.yaml +++ b/.github/workflows/ci-llama-large-tests.yaml @@ -28,7 +28,7 @@ jobs: matrix: version: [3.11] fail-fast: false - runs-on: linux-mi300-1gpu-ossci + runs-on: llama-mi300x-1 defaults: run: shell: bash diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 218ba5b4a..51a34db4f 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -102,7 +102,6 @@ jobs: env: VENV_DIR: ${{ github.workspace }}/.venv HF_HOME: "/data/huggingface" - HF_TOKEN: ${{ secrets.HF_TOKEN }} steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 From 9127d592655b8e8d323e79521b16b69720d99d0f Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Mon, 17 Feb 2025 13:14:32 -0600 Subject: [PATCH 12/35] updated hf token Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 51a34db4f..2af4ce9e4 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -93,7 +93,7 @@ jobs: strategy: matrix: python-version: [3.11] - runs-on: [llama-mi300x-3] + runs-on: [linux-mi300-1gpu-ossci] fail-fast: false runs-on: ${{matrix.runs-on}} defaults: @@ -102,6 +102,7 @@ jobs: env: VENV_DIR: ${{ github.workspace }}/.venv HF_HOME: "/data/huggingface" + HF_TOKEN: ${{ secrets.HF_FLUX_TOKEN }} steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 From c9c761e1446036fc9ba54d0b632e1c68318a422a Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Mon, 17 Feb 2025 18:28:06 -0600 Subject: [PATCH 13/35] reverted shark-tank Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 2af4ce9e4..ee41182f0 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -93,7 +93,7 @@ jobs: strategy: matrix: python-version: [3.11] - runs-on: [linux-mi300-1gpu-ossci] + runs-on: [llama-mi300x-3] fail-fast: false runs-on: ${{matrix.runs-on}} defaults: From 01bb4c1a7997661322879c5b6d77c1631178ede5 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Mon, 17 Feb 2025 18:32:52 -0600 Subject: [PATCH 14/35] addressed comments Signed-off-by: Elias Joseph --- .github/workflows/ci-llama-large-tests.yaml | 2 +- .github/workflows/ci-sharktank.yml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci-llama-large-tests.yaml b/.github/workflows/ci-llama-large-tests.yaml index a637c4138..43f0f27a0 100644 --- a/.github/workflows/ci-llama-large-tests.yaml +++ b/.github/workflows/ci-llama-large-tests.yaml @@ -28,7 +28,7 @@ jobs: matrix: version: [3.11] fail-fast: false - runs-on: llama-mi300x-1 + runs-on: linux-mi300-1gpu-ossci defaults: run: shell: bash diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index ee41182f0..51a34db4f 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -102,7 +102,6 @@ jobs: env: VENV_DIR: ${{ github.workspace }}/.venv HF_HOME: "/data/huggingface" - HF_TOKEN: ${{ secrets.HF_FLUX_TOKEN }} steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 From 922fc87bc2bb6a09c22ecfb392e4209024700bcb Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Wed, 19 Feb 2025 14:38:32 -0600 Subject: [PATCH 15/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 5 +++-- sharktank/conftest.py | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 51a34db4f..f01106dde 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -93,7 +93,7 @@ jobs: strategy: matrix: python-version: [3.11] - runs-on: [llama-mi300x-3] + runs-on: [linux-mi300-1gpu-ossci] fail-fast: false runs-on: ${{matrix.runs-on}} defaults: @@ -101,7 +101,8 @@ jobs: shell: bash env: VENV_DIR: ${{ github.workspace }}/.venv - HF_HOME: "/data/huggingface" + HF_HOME: "/shark-dev/data/huggingface" + HF_TOKEN: ${{ secrets.HF_FLUX_TOKEN }} steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 diff --git a/sharktank/conftest.py b/sharktank/conftest.py index 5aae72d41..57f8e72f4 100644 --- a/sharktank/conftest.py +++ b/sharktank/conftest.py @@ -191,25 +191,25 @@ def pytest_addoption(parser): parser.addoption( "--google-t5-v1-1-small-f32-model-path", type=Path, - default="/data/t5/small/google__t5-v1_1-small_f32.gguf", + default="/shark-dev/data/t5/small/google__t5-v1_1-small_f32.gguf", help="Google T5 v1.1 small float32 model path", ) parser.addoption( "--google-t5-v1-1-small-bf16-model-path", type=Path, - default="/data/t5/small/google__t5-v1_1-small_bf16.gguf", + default="/shark-dev/data/t5/small/google__t5-v1_1-small_bf16.gguf", help="Google T5 v1.1 small bfloat16 model path", ) parser.addoption( "--google-t5-v1-1-xxl-f32-model-path", type=Path, - default="/data/t5/xxl/google__t5-v1_1-xxl_f32.gguf", + default="shark-dev/data/t5/xxl/google__t5-v1_1-xxl_f32.gguf", help="Google T5 v1.1 XXL float32 model path", ) parser.addoption( "--google-t5-v1-1-xxl-bf16-model-path", type=Path, - default="/data/t5/xxl/google__t5-v1_1-xxl_bf16.gguf", + default="/shark-dev/data/t5/xxl/google__t5-v1_1-xxl_bf16.gguf", help="Google T5 v1.1 XXL bfloat16 model path", ) From fa0ba0ed49dca54284468be960a550e3bfa2064d Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Wed, 19 Feb 2025 14:42:44 -0600 Subject: [PATCH 16/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index f01106dde..ca9686bb3 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -101,7 +101,7 @@ jobs: shell: bash env: VENV_DIR: ${{ github.workspace }}/.venv - HF_HOME: "/shark-dev/data/huggingface" + HF_HOME: "/shark-cache/data/huggingface" HF_TOKEN: ${{ secrets.HF_FLUX_TOKEN }} steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 From 806dd2f7973b6f39d719cafd4900e3cb73d3f44a Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Thu, 20 Feb 2025 13:57:58 -0600 Subject: [PATCH 17/35] moved quark artifacts to writable mount Signed-off-by: Elias Joseph --- sharktank/tests/models/llama/quark_parity_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sharktank/tests/models/llama/quark_parity_test.py b/sharktank/tests/models/llama/quark_parity_test.py index b45696fe4..294f7196b 100644 --- a/sharktank/tests/models/llama/quark_parity_test.py +++ b/sharktank/tests/models/llama/quark_parity_test.py @@ -18,7 +18,7 @@ class QuarkParityTest(unittest.TestCase): def setUp(self): super().setUp() - self.path_prefix = Path("/shark-dev/quark_test") + self.path_prefix = Path("/shark-cache/quark_test") @with_quark_data def test_compare_against_quark(self): From a1b12829de3c8b817db2b1d5af41ce0e2732d139 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Thu, 20 Feb 2025 17:20:31 -0600 Subject: [PATCH 18/35] added permissions Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index ca9686bb3..770bce57d 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -136,6 +136,7 @@ jobs: # We would still want the separate flags as we may endup with data being # scattered on different CI machines. run: | + chmod -R 777 /shark-cache # Ensure full access source ${VENV_DIR}/bin/activate pytest \ -v \ From 0db2d98b316c002658a4e749210805d2c1076188 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Thu, 20 Feb 2025 17:35:28 -0600 Subject: [PATCH 19/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 770bce57d..953aece4c 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -136,7 +136,7 @@ jobs: # We would still want the separate flags as we may endup with data being # scattered on different CI machines. run: | - chmod -R 777 /shark-cache # Ensure full access + sudo chmod -R 777 /shark-cache # Ensure full access source ${VENV_DIR}/bin/activate pytest \ -v \ From bae7745c61f7c2bc65b52ac468b4fe600828bbd2 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Thu, 20 Feb 2025 19:02:58 -0600 Subject: [PATCH 20/35] seeing if tests pass while removing delete line Signed-off-by: Elias Joseph --- sharktank/tests/models/llama/quark_parity_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sharktank/tests/models/llama/quark_parity_test.py b/sharktank/tests/models/llama/quark_parity_test.py index 294f7196b..e3c1028dd 100644 --- a/sharktank/tests/models/llama/quark_parity_test.py +++ b/sharktank/tests/models/llama/quark_parity_test.py @@ -26,8 +26,8 @@ def test_compare_against_quark(self): Path(os.path.dirname(os.path.abspath(__file__))).parent.parent.parent.parent ) our_path = self.path_prefix / "ours_prefill.safetensors" - if os.path.exists(our_path): - os.remove(our_path) + #if os.path.exists(our_path): + # os.remove(our_path) mapping = dict() for i in range(32): From e031685e0c66df6bb3add70365aef955ca7d25df Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Thu, 20 Feb 2025 22:18:52 -0600 Subject: [PATCH 21/35] attempted to fix prefills issue Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 4 ++-- sharktank/tests/models/llama/quark_parity_test.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 953aece4c..4b196c2bf 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -138,7 +138,7 @@ jobs: run: | sudo chmod -R 777 /shark-cache # Ensure full access source ${VENV_DIR}/bin/activate - pytest \ + sudo pytest \ -v \ --log-cli-level=info \ --with-clip-data \ @@ -151,7 +151,7 @@ jobs: sharktank/tests/models/vae/vae_test.py \ sharktank/tests/models/llama/quark_parity_test.py \ --durations=0 \ - --timeout=800 + --timeout=1000 # TODO: add back # --with-t5-data \ # when #888 is resolved diff --git a/sharktank/tests/models/llama/quark_parity_test.py b/sharktank/tests/models/llama/quark_parity_test.py index e3c1028dd..294f7196b 100644 --- a/sharktank/tests/models/llama/quark_parity_test.py +++ b/sharktank/tests/models/llama/quark_parity_test.py @@ -26,8 +26,8 @@ def test_compare_against_quark(self): Path(os.path.dirname(os.path.abspath(__file__))).parent.parent.parent.parent ) our_path = self.path_prefix / "ours_prefill.safetensors" - #if os.path.exists(our_path): - # os.remove(our_path) + if os.path.exists(our_path): + os.remove(our_path) mapping = dict() for i in range(32): From 7aaa179a65bd395a822c12ed0be0b0669096aa67 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Thu, 20 Feb 2025 22:24:12 -0600 Subject: [PATCH 22/35] attempted to fix prefills issue Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 4b196c2bf..261d65350 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -137,8 +137,9 @@ jobs: # scattered on different CI machines. run: | sudo chmod -R 777 /shark-cache # Ensure full access + lsattr -d /shark-cache source ${VENV_DIR}/bin/activate - sudo pytest \ + pytest \ -v \ --log-cli-level=info \ --with-clip-data \ From a32102b7a142be3300dbb0e643583b80f733866d Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Thu, 20 Feb 2025 22:27:10 -0600 Subject: [PATCH 23/35] attempted to fix prefills issue Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 261d65350..51c465760 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -137,7 +137,8 @@ jobs: # scattered on different CI machines. run: | sudo chmod -R 777 /shark-cache # Ensure full access - lsattr -d /shark-cache + ls -ld /shark-cache + sudo chown -R $USER:$USER /shark-cache source ${VENV_DIR}/bin/activate pytest \ -v \ From c45a397b302031399f6bb1bfc244c4f5fffb20a9 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Thu, 20 Feb 2025 23:54:22 -0600 Subject: [PATCH 24/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 51c465760..615a59f57 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -153,7 +153,7 @@ jobs: sharktank/tests/models/vae/vae_test.py \ sharktank/tests/models/llama/quark_parity_test.py \ --durations=0 \ - --timeout=1000 + --timeout=300 # TODO: add back # --with-t5-data \ # when #888 is resolved @@ -197,7 +197,7 @@ jobs: run: | pytest -v sharktank/ -m punet_quick \ --durations=0 \ - --timeout=600 + --timeout=900 # Depends on other jobs to provide an aggregate job status. # TODO(#584): move test_with_data and test_integration to a pkgci integration test workflow? From 89d3aef7739ee9db824ea5b2bd665c10deeced3d Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 21 Feb 2025 01:10:28 -0600 Subject: [PATCH 25/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 615a59f57..8479ff59b 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -147,10 +147,10 @@ jobs: --with-flux-data \ --with-vae-data \ --with-quark-data \ - sharktank/tests/models/clip/clip_test.py \ - sharktank/tests/models/t5/t5_test.py \ - sharktank/tests/models/flux/flux_test.py \ - sharktank/tests/models/vae/vae_test.py \ +# sharktank/tests/models/clip/clip_test.py \ +# sharktank/tests/models/t5/t5_test.py \ +# sharktank/tests/models/flux/flux_test.py \ +# sharktank/tests/models/vae/vae_test.py \ sharktank/tests/models/llama/quark_parity_test.py \ --durations=0 \ --timeout=300 From 18af41d9b485a1a4af5a69121d0fbf8c6502662d Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 21 Feb 2025 01:11:27 -0600 Subject: [PATCH 26/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 8479ff59b..d36d8e6dd 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -147,10 +147,6 @@ jobs: --with-flux-data \ --with-vae-data \ --with-quark-data \ -# sharktank/tests/models/clip/clip_test.py \ -# sharktank/tests/models/t5/t5_test.py \ -# sharktank/tests/models/flux/flux_test.py \ -# sharktank/tests/models/vae/vae_test.py \ sharktank/tests/models/llama/quark_parity_test.py \ --durations=0 \ --timeout=300 From e1102ca44d78a58824632faed419d6ff1e7f0189 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 21 Feb 2025 01:17:03 -0600 Subject: [PATCH 27/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- sharktank/tests/models/llama/quark_parity_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sharktank/tests/models/llama/quark_parity_test.py b/sharktank/tests/models/llama/quark_parity_test.py index 294f7196b..35be0769d 100644 --- a/sharktank/tests/models/llama/quark_parity_test.py +++ b/sharktank/tests/models/llama/quark_parity_test.py @@ -25,7 +25,7 @@ def test_compare_against_quark(self): sharktank_dir = str( Path(os.path.dirname(os.path.abspath(__file__))).parent.parent.parent.parent ) - our_path = self.path_prefix / "ours_prefill.safetensors" + our_path = "ours_prefill.safetensors" if os.path.exists(our_path): os.remove(our_path) @@ -58,7 +58,7 @@ def test_compare_against_quark(self): "--fake-quant", "--attention-kernel=torch", "--activation-dtype=bfloat16", - f"--save_intermediates_path={self.path_prefix}/ours", + f"--save_intermediates_path=ours", "--use-hf", "--attention-dtype=bfloat16", "--skip-decode", From 475309f2c8f31b09853571ea2532f36c66cf6710 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 21 Feb 2025 01:21:27 -0600 Subject: [PATCH 28/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- sharktank/tests/models/llama/quark_parity_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sharktank/tests/models/llama/quark_parity_test.py b/sharktank/tests/models/llama/quark_parity_test.py index 35be0769d..61914e913 100644 --- a/sharktank/tests/models/llama/quark_parity_test.py +++ b/sharktank/tests/models/llama/quark_parity_test.py @@ -25,7 +25,7 @@ def test_compare_against_quark(self): sharktank_dir = str( Path(os.path.dirname(os.path.abspath(__file__))).parent.parent.parent.parent ) - our_path = "ours_prefill.safetensors" + our_path = self.path_prefix / "ours_prefill.safetensors" if os.path.exists(our_path): os.remove(our_path) @@ -58,12 +58,13 @@ def test_compare_against_quark(self): "--fake-quant", "--attention-kernel=torch", "--activation-dtype=bfloat16", - f"--save_intermediates_path=ours", + f"--save_intermediates_path={self.path_prefix}/ours", "--use-hf", "--attention-dtype=bfloat16", "--skip-decode", "--block-seq-stride=16", ] + f = open("/shark-cache/quark_test/test.txt", "w+") command = subprocess.list2cmdline(command) proc = subprocess.run( command, shell=True, capture_output=True, cwd=sharktank_dir From fd7b7bd213099cf50eea7aa9e499e2e1ad0c5153 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 21 Feb 2025 01:32:20 -0600 Subject: [PATCH 29/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- sharktank/sharktank/examples/paged_llm_v1.py | 3 +++ sharktank/tests/models/llama/quark_parity_test.py | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sharktank/sharktank/examples/paged_llm_v1.py b/sharktank/sharktank/examples/paged_llm_v1.py index 5d338bd74..e35b97c90 100644 --- a/sharktank/sharktank/examples/paged_llm_v1.py +++ b/sharktank/sharktank/examples/paged_llm_v1.py @@ -322,6 +322,9 @@ def main(): print(batch.detokenize()) if args.save_intermediates_path: + f_ = open("/shark-cache/quark_test/test2.txt", "w+") + f_.write(args.save_intermediates_path + "_prefill.safetensors") + f_.close() intermediates_saver.save_file( args.save_intermediates_path + "_prefill.safetensors" ) diff --git a/sharktank/tests/models/llama/quark_parity_test.py b/sharktank/tests/models/llama/quark_parity_test.py index 61914e913..294f7196b 100644 --- a/sharktank/tests/models/llama/quark_parity_test.py +++ b/sharktank/tests/models/llama/quark_parity_test.py @@ -64,7 +64,6 @@ def test_compare_against_quark(self): "--skip-decode", "--block-seq-stride=16", ] - f = open("/shark-cache/quark_test/test.txt", "w+") command = subprocess.list2cmdline(command) proc = subprocess.run( command, shell=True, capture_output=True, cwd=sharktank_dir From 976b2bad03c173ad2fa1a84be17322563ab6f238 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 21 Feb 2025 01:35:37 -0600 Subject: [PATCH 30/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- sharktank/sharktank/examples/paged_llm_v1.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sharktank/sharktank/examples/paged_llm_v1.py b/sharktank/sharktank/examples/paged_llm_v1.py index e35b97c90..1ca5a487c 100644 --- a/sharktank/sharktank/examples/paged_llm_v1.py +++ b/sharktank/sharktank/examples/paged_llm_v1.py @@ -263,6 +263,10 @@ def pad_block_ids(self) -> torch.Tensor: def main(): from ..utils import cli + f_ = open("/shark-cache/quark_test/test0.txt", "w+") + f_.write("HERE") + f_.close() + parser = cli.create_parser() parser.add_argument("prompt", nargs="+", help="Prompt strings") parser.add_argument( @@ -321,6 +325,10 @@ def main(): batch.prefill() print(batch.detokenize()) + f_ = open("/shark-cache/quark_test/test1.txt", "w+") + f_.write(args.save_intermediates_path + "_prefill.safetensors") + f_.close() + if args.save_intermediates_path: f_ = open("/shark-cache/quark_test/test2.txt", "w+") f_.write(args.save_intermediates_path + "_prefill.safetensors") From 531d86f5acbeed11279f4a0b4dbecfa60e256cea Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 21 Feb 2025 01:41:42 -0600 Subject: [PATCH 31/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- sharktank/sharktank/examples/paged_llm_v1.py | 11 ----------- sharktank/tests/models/llama/quark_parity_test.py | 5 +++++ 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/sharktank/sharktank/examples/paged_llm_v1.py b/sharktank/sharktank/examples/paged_llm_v1.py index 1ca5a487c..5d338bd74 100644 --- a/sharktank/sharktank/examples/paged_llm_v1.py +++ b/sharktank/sharktank/examples/paged_llm_v1.py @@ -263,10 +263,6 @@ def pad_block_ids(self) -> torch.Tensor: def main(): from ..utils import cli - f_ = open("/shark-cache/quark_test/test0.txt", "w+") - f_.write("HERE") - f_.close() - parser = cli.create_parser() parser.add_argument("prompt", nargs="+", help="Prompt strings") parser.add_argument( @@ -325,14 +321,7 @@ def main(): batch.prefill() print(batch.detokenize()) - f_ = open("/shark-cache/quark_test/test1.txt", "w+") - f_.write(args.save_intermediates_path + "_prefill.safetensors") - f_.close() - if args.save_intermediates_path: - f_ = open("/shark-cache/quark_test/test2.txt", "w+") - f_.write(args.save_intermediates_path + "_prefill.safetensors") - f_.close() intermediates_saver.save_file( args.save_intermediates_path + "_prefill.safetensors" ) diff --git a/sharktank/tests/models/llama/quark_parity_test.py b/sharktank/tests/models/llama/quark_parity_test.py index 294f7196b..6967efcc0 100644 --- a/sharktank/tests/models/llama/quark_parity_test.py +++ b/sharktank/tests/models/llama/quark_parity_test.py @@ -69,6 +69,11 @@ def test_compare_against_quark(self): command, shell=True, capture_output=True, cwd=sharktank_dir ) + f_ = open("/shark-cache/quark_test/test0.txt", "w+") + f_.write(str(proc)) + f_.close() + + ours = dict() with safe_open(our_path, "pytorch") as st: for key in st.keys(): From 2e70e719f573df28a29a4ceb514249f8c3d9b0aa Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 21 Feb 2025 01:50:27 -0600 Subject: [PATCH 32/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- sharktank/tests/models/llama/quark_parity_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sharktank/tests/models/llama/quark_parity_test.py b/sharktank/tests/models/llama/quark_parity_test.py index 6967efcc0..32738b4d0 100644 --- a/sharktank/tests/models/llama/quark_parity_test.py +++ b/sharktank/tests/models/llama/quark_parity_test.py @@ -54,7 +54,7 @@ def test_compare_against_quark(self): "sharktank.examples.paged_llm_v1", "The capitol of Texas is", f"--irpa-file={self.path_prefix}/fp8_bf16_weight.irpa", - f"--tokenizer-config-json=/data/llama3.1/8b/tokenizer.json", + f"--tokenizer-config-json=/shark-dev/data/llama3.1/weights/8b/tokenizer.json", "--fake-quant", "--attention-kernel=torch", "--activation-dtype=bfloat16", From b6a53d766ed436bd14c2a86355c2fee1852191cc Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 21 Feb 2025 01:53:18 -0600 Subject: [PATCH 33/35] tried to fix path in sharktank Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index d36d8e6dd..ea3adecdd 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -147,9 +147,13 @@ jobs: --with-flux-data \ --with-vae-data \ --with-quark-data \ + sharktank/tests/models/clip/clip_test.py \ + sharktank/tests/models/t5/t5_test.py \ + sharktank/tests/models/flux/flux_test.py \ + sharktank/tests/models/vae/vae_test.py \ sharktank/tests/models/llama/quark_parity_test.py \ --durations=0 \ - --timeout=300 + --timeout=10000 # TODO: add back # --with-t5-data \ # when #888 is resolved From 9beaea892c4d934464cd88edda39f83f8b28319c Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 21 Feb 2025 13:56:01 -0600 Subject: [PATCH 34/35] cleaned up pr Signed-off-by: Elias Joseph --- sharktank/conftest.py | 2 +- sharktank/tests/models/llama/quark_parity_test.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sharktank/conftest.py b/sharktank/conftest.py index 57f8e72f4..efa43e5f3 100644 --- a/sharktank/conftest.py +++ b/sharktank/conftest.py @@ -203,7 +203,7 @@ def pytest_addoption(parser): parser.addoption( "--google-t5-v1-1-xxl-f32-model-path", type=Path, - default="shark-dev/data/t5/xxl/google__t5-v1_1-xxl_f32.gguf", + default="/shark-dev/data/t5/xxl/google__t5-v1_1-xxl_f32.gguf", help="Google T5 v1.1 XXL float32 model path", ) parser.addoption( diff --git a/sharktank/tests/models/llama/quark_parity_test.py b/sharktank/tests/models/llama/quark_parity_test.py index 32738b4d0..2701507e3 100644 --- a/sharktank/tests/models/llama/quark_parity_test.py +++ b/sharktank/tests/models/llama/quark_parity_test.py @@ -73,7 +73,6 @@ def test_compare_against_quark(self): f_.write(str(proc)) f_.close() - ours = dict() with safe_open(our_path, "pytorch") as st: for key in st.keys(): From d403e53645072480a6dace209467c0055552d547 Mon Sep 17 00:00:00 2001 From: Elias Joseph Date: Fri, 21 Feb 2025 13:57:25 -0600 Subject: [PATCH 35/35] cleaned up pr Signed-off-by: Elias Joseph --- .github/workflows/ci-sharktank.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index ea3adecdd..76722642a 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -136,9 +136,6 @@ jobs: # We would still want the separate flags as we may endup with data being # scattered on different CI machines. run: | - sudo chmod -R 777 /shark-cache # Ensure full access - ls -ld /shark-cache - sudo chown -R $USER:$USER /shark-cache source ${VENV_DIR}/bin/activate pytest \ -v \