[https://nvbugs/5601682][fix] unwaive test_disaggregated_deepseek_v3_… (#8888)

Signed-off-by: Bo Deng <deemod@nvidia.com>
This commit is contained in:
Bo Deng 2025-11-05 09:33:57 +08:00 committed by GitHub
parent 0206d8d0fc
commit 43843778a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 5 additions and 11 deletions

View File

@ -985,7 +985,7 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):
@skip_pre_blackwell
@pytest.mark.timeout(DEFAULT_TEST_TIMEOUT)
@pytest.mark.timeout(3600)
class TestQwen3_30B_A3B(LlmapiAccuracyTestHarness):
FP4_MODEL = f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf"
FP8_MODEL = f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_fp8_hf"

View File

@ -1289,8 +1289,7 @@ def run_disaggregated_benchmark(example_dir,
random_input_len=16,
random_output_len=64,
num_prompts=100,
max_concurrency=32,
skip_warmup=False):
max_concurrency=32):
"""Run disaggregated test with given configuration."""
run_env = env.copy()
run_env["UCX_TLS"] = "^ib"
@ -1320,7 +1319,7 @@ def run_disaggregated_benchmark(example_dir,
stderr=subprocess.STDOUT,
env=run_env,
cwd=cwd) as server_proc):
# Ensure the sever has started
client_dir = f"{example_dir}/clients"
client_cmd = [
'python3', f'{client_dir}/disagg_client.py', '-c',
@ -1329,7 +1328,7 @@ def run_disaggregated_benchmark(example_dir,
'--server-start-timeout',
str(server_start_timeout)
]
# Warm up
# Ensure the sever has started and workers are ready
check_call(client_cmd,
env=env,
poll_procs=[workers_proc, server_proc])
@ -1366,9 +1365,6 @@ def run_disaggregated_benchmark(example_dir,
'--percentile-metrics',
'e2el,ttft',
]
# warm up
if not skip_warmup:
check_call(benchmark_cmd, env=env)
output = check_output(benchmark_cmd, env=env)
e2el_pattern = r"Median E2EL \(ms\):\s*(\d+\.?\d*)"
ttft_pattern = r"Median TTFT \(ms\):\s*(\d+\.?\d*)"
@ -1513,8 +1509,7 @@ def test_disaggregated_deepseek_v3_lite_bf16_empty_batch(
num_prompts=10,
max_concurrency=10,
random_input_len=384,
random_output_len=1536,
skip_warmup=True)
random_output_len=1536)
print(f"E2EL: {e2el} ms, TTFT: {ttft} ms")
assert e2el > 0 and ttft > 0

View File

@ -337,7 +337,6 @@ triton_server/test_triton_llm.py::test_llmapi_backend[4-0-disableDecoupleMode-te
triton_server/test_triton_llm.py::test_llmapi_backend[1-0-disableDecoupleMode-tensorrt_llm] SKIP (https://nvbugs/5461874)
triton_server/test_triton_llm.py::test_llmapi_backend[1-0-enableDecoupleMode-tensorrt_llm] SKIP (https://nvbugs/5461874)
cpp/test_e2e.py::test_benchmarks[gpt-80] SKIP (https://nvbugs/5601670)
disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_bf16_empty_batch[DeepSeek-V3-Lite-bf16] SKIP (https://nvbugs/5601682)
disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] SKIP (https://nvbugs/5587574)
full:H20-3e/accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True] SKIP (slow I/O)
full:H20-3e/accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency] SKIP (slow I/O)