Unwaive disaggregated serving accuracy tests (#5095)

Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>
Signed-off-by: Iman Tabrizian <10105175+Tabrizian@users.noreply.github.com>
This commit is contained in:
Iman Tabrizian 2025-06-18 12:41:15 -04:00 committed by GitHub
parent 857108aeca
commit e5ee5c5352
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 14 additions and 23 deletions

View File

@ -58,7 +58,7 @@ class MyThreadPoolExecutor(ThreadPoolExecutor):
for future in self.futures:
future.cancel()
self.shutdown(wait=False, cancel_futures=True)
self.shutdown(wait=True, cancel_futures=True)
return False
@ -163,15 +163,16 @@ def launch_disaggregated_llm(disaggregated_server_config: Dict[str, Any],
thread_pool.futures.append(future)
return future
yield DuckLLM(args, generate_async)
try:
yield DuckLLM(args, generate_async)
finally:
ctx_server.terminate()
gen_server.terminate()
disaggregated_server.terminate()
ctx_server.terminate()
gen_server.terminate()
disaggregated_server.terminate()
ctx_server.wait()
gen_server.wait()
disaggregated_server.wait()
ctx_server.wait()
gen_server.wait()
disaggregated_server.wait()
@pytest.mark.timeout(3600)
@ -252,16 +253,8 @@ class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):
@parametrize_with_ids("mtp_nextn",
[0, pytest.param(2, marks=skip_pre_hopper)])
def test_auto_dtype(self, overlap_scheduler, mtp_nextn):
ctx_server_config = {
"pytorch_backend_config": {
"disable_overlap_scheduler": True
}
}
gen_server_config = {
"pytorch_backend_config": {
"disable_overlap_scheduler": not overlap_scheduler
}
}
ctx_server_config = {"disable_overlap_scheduler": True}
gen_server_config = {"disable_overlap_scheduler": not overlap_scheduler}
if mtp_nextn > 0:
ctx_server_config["speculative_config"] = {
"decoding_type": "MTP",

View File

@ -402,10 +402,6 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama
test_e2e.py::test_ptp_quickstart_advanced_ngram[Llama-3.1-8B-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct] SKIP (https://nvbugspro.nvidia.com/bug/5324239)
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM-torch_compile=False] SKIP (https://nvbugs/5318143)
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM-torch_compile=True] SKIP (https://nvbugs/5318143)
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=False] SKIP (https://nvbugs/5322354)
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=True] SKIP (https://nvbugs/5322354)
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=2-overlap_scheduler=False] SKIP (https://nvbugs/5322354)
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=2-overlap_scheduler=True] SKIP (https://nvbugs/5322354)
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-H-8B-Nemotron-H-8B-Base-8K] SKIP (https://nvbugs/5325284)
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-70B] SKIP (https://nvbugs/5323316)
disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5328160)
@ -416,6 +412,8 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta
test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1] SKIP (https://nvbugs/5333659)
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-NVFP4-nvfp4-quantized/Llama-3_3-Nemotron-Super-49B-v1_nvfp4_hf] SKIP (https://nvbugs/5333659)
examples/test_multimodal.py::test_llm_multimodal_general[Mistral-Small-3.1-24B-Instruct-2503-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5331031)
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=True] SKIP (https://nvbugs/5322354)
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=2-overlap_scheduler=True] SKIP (https://nvbugs/5322354)
accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5336321)
accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5336321)
full:B200/examples/test_gemma.py::test_llm_gemma_1gpu_summary_vswa[gemma-3-1b-it-other-bfloat16-8] SKIP (https://nvbugs/5292737)