[https://nvbugs/5601682][fix] unwaive test_disaggregated_deepseek_v3_… (#8888)

Signed-off-by: Bo Deng <deemod@nvidia.com>
2026-01-14 06:27:45 +08:00 · 2025-11-05 09:33:57 +08:00 · 2025-11-05 09:33:57 +08:00 · 43843778a7
commit 43843778a7
parent 0206d8d0fc
3 changed files with 5 additions and 11 deletions
--- a/tests/integration/defs/accuracy/test_disaggregated_serving.py
+++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py
@ -985,7 +985,7 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):


@skip_pre_blackwell
-@pytest.mark.timeout(DEFAULT_TEST_TIMEOUT)
+@pytest.mark.timeout(3600)
 class TestQwen3_30B_A3B(LlmapiAccuracyTestHarness):
    FP4_MODEL = f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf"
    FP8_MODEL = f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_fp8_hf"
--- a/tests/integration/defs/disaggregated/test_disaggregated.py
+++ b/tests/integration/defs/disaggregated/test_disaggregated.py
@ -1289,8 +1289,7 @@ def run_disaggregated_benchmark(example_dir,
                                random_input_len=16,
                                random_output_len=64,
                                num_prompts=100,
-                                max_concurrency=32,
-                                skip_warmup=False):
+                                max_concurrency=32):
    """Run disaggregated test with given configuration."""
    run_env = env.copy()
    run_env["UCX_TLS"] = "^ib"
@ -1320,7 +1319,7 @@ def run_disaggregated_benchmark(example_dir,
                      stderr=subprocess.STDOUT,
                      env=run_env,
                      cwd=cwd) as server_proc):
-            # Ensure the sever has started
+
            client_dir = f"{example_dir}/clients"
            client_cmd = [
                'python3', f'{client_dir}/disagg_client.py', '-c',
@ -1329,7 +1328,7 @@ def run_disaggregated_benchmark(example_dir,
                '--server-start-timeout',
                str(server_start_timeout)
            ]
-            # Warm up
+            # Ensure the sever has started and workers are ready
            check_call(client_cmd,
                       env=env,
                       poll_procs=[workers_proc, server_proc])
@ -1366,9 +1365,6 @@ def run_disaggregated_benchmark(example_dir,
                '--percentile-metrics',
                'e2el,ttft',
            ]
-            # warm up
-            if not skip_warmup:
-                check_call(benchmark_cmd, env=env)
            output = check_output(benchmark_cmd, env=env)
            e2el_pattern = r"Median E2EL \(ms\):\s*(\d+\.?\d*)"
            ttft_pattern = r"Median TTFT \(ms\):\s*(\d+\.?\d*)"
@ -1513,8 +1509,7 @@ def test_disaggregated_deepseek_v3_lite_bf16_empty_batch(
        num_prompts=10,
        max_concurrency=10,
        random_input_len=384,
-        random_output_len=1536,
-        skip_warmup=True)
+        random_output_len=1536)
    print(f"E2EL: {e2el} ms, TTFT: {ttft} ms")

    assert e2el > 0 and ttft > 0
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@ -337,7 +337,6 @@ triton_server/test_triton_llm.py::test_llmapi_backend[4-0-disableDecoupleMode-te
 triton_server/test_triton_llm.py::test_llmapi_backend[1-0-disableDecoupleMode-tensorrt_llm] SKIP (https://nvbugs/5461874)
 triton_server/test_triton_llm.py::test_llmapi_backend[1-0-enableDecoupleMode-tensorrt_llm] SKIP (https://nvbugs/5461874)
 cpp/test_e2e.py::test_benchmarks[gpt-80] SKIP (https://nvbugs/5601670)
-disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_bf16_empty_batch[DeepSeek-V3-Lite-bf16] SKIP (https://nvbugs/5601682)
 disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] SKIP (https://nvbugs/5587574)
 full:H20-3e/accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True] SKIP (slow I/O)
 full:H20-3e/accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency] SKIP (slow I/O)