mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5601682][fix] unwaive test_disaggregated_deepseek_v3_… (#8888)
Signed-off-by: Bo Deng <deemod@nvidia.com>
This commit is contained in:
parent
0206d8d0fc
commit
43843778a7
@ -985,7 +985,7 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):
|
||||
|
||||
|
||||
@skip_pre_blackwell
|
||||
@pytest.mark.timeout(DEFAULT_TEST_TIMEOUT)
|
||||
@pytest.mark.timeout(3600)
|
||||
class TestQwen3_30B_A3B(LlmapiAccuracyTestHarness):
|
||||
FP4_MODEL = f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf"
|
||||
FP8_MODEL = f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_fp8_hf"
|
||||
|
||||
@ -1289,8 +1289,7 @@ def run_disaggregated_benchmark(example_dir,
|
||||
random_input_len=16,
|
||||
random_output_len=64,
|
||||
num_prompts=100,
|
||||
max_concurrency=32,
|
||||
skip_warmup=False):
|
||||
max_concurrency=32):
|
||||
"""Run disaggregated test with given configuration."""
|
||||
run_env = env.copy()
|
||||
run_env["UCX_TLS"] = "^ib"
|
||||
@ -1320,7 +1319,7 @@ def run_disaggregated_benchmark(example_dir,
|
||||
stderr=subprocess.STDOUT,
|
||||
env=run_env,
|
||||
cwd=cwd) as server_proc):
|
||||
# Ensure the sever has started
|
||||
|
||||
client_dir = f"{example_dir}/clients"
|
||||
client_cmd = [
|
||||
'python3', f'{client_dir}/disagg_client.py', '-c',
|
||||
@ -1329,7 +1328,7 @@ def run_disaggregated_benchmark(example_dir,
|
||||
'--server-start-timeout',
|
||||
str(server_start_timeout)
|
||||
]
|
||||
# Warm up
|
||||
# Ensure the sever has started and workers are ready
|
||||
check_call(client_cmd,
|
||||
env=env,
|
||||
poll_procs=[workers_proc, server_proc])
|
||||
@ -1366,9 +1365,6 @@ def run_disaggregated_benchmark(example_dir,
|
||||
'--percentile-metrics',
|
||||
'e2el,ttft',
|
||||
]
|
||||
# warm up
|
||||
if not skip_warmup:
|
||||
check_call(benchmark_cmd, env=env)
|
||||
output = check_output(benchmark_cmd, env=env)
|
||||
e2el_pattern = r"Median E2EL \(ms\):\s*(\d+\.?\d*)"
|
||||
ttft_pattern = r"Median TTFT \(ms\):\s*(\d+\.?\d*)"
|
||||
@ -1513,8 +1509,7 @@ def test_disaggregated_deepseek_v3_lite_bf16_empty_batch(
|
||||
num_prompts=10,
|
||||
max_concurrency=10,
|
||||
random_input_len=384,
|
||||
random_output_len=1536,
|
||||
skip_warmup=True)
|
||||
random_output_len=1536)
|
||||
print(f"E2EL: {e2el} ms, TTFT: {ttft} ms")
|
||||
|
||||
assert e2el > 0 and ttft > 0
|
||||
|
||||
@ -337,7 +337,6 @@ triton_server/test_triton_llm.py::test_llmapi_backend[4-0-disableDecoupleMode-te
|
||||
triton_server/test_triton_llm.py::test_llmapi_backend[1-0-disableDecoupleMode-tensorrt_llm] SKIP (https://nvbugs/5461874)
|
||||
triton_server/test_triton_llm.py::test_llmapi_backend[1-0-enableDecoupleMode-tensorrt_llm] SKIP (https://nvbugs/5461874)
|
||||
cpp/test_e2e.py::test_benchmarks[gpt-80] SKIP (https://nvbugs/5601670)
|
||||
disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_bf16_empty_batch[DeepSeek-V3-Lite-bf16] SKIP (https://nvbugs/5601682)
|
||||
disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] SKIP (https://nvbugs/5587574)
|
||||
full:H20-3e/accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True] SKIP (slow I/O)
|
||||
full:H20-3e/accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency] SKIP (slow I/O)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user