From 80708ba231a4b74587604e70ed964c79a077e286 Mon Sep 17 00:00:00 2001 From: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> Date: Mon, 2 Feb 2026 15:32:05 +0800 Subject: [PATCH] [https://nvbugs/5787904][fix] update mig tests (#11014) Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com> --- tests/integration/defs/test_e2e.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 918f135c21..6c9f924066 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -666,11 +666,8 @@ def test_trtllm_bench_llmapi_launch(llm_root, llm_venv, model_name, ids=["llama3_1-8b"]) @pytest.mark.parametrize("model_subdir", ["llama-3.1-model/Meta-Llama-3.1-8B"], ids=["llama_v3_1"]) -@pytest.mark.parametrize("use_pytorch_backend", [False], ids=["trt_backend"]) -def test_trtllm_bench_mig_launch(llm_root, llm_venv, model_name, model_subdir, - use_pytorch_backend): +def test_trtllm_bench_mig_launch(llm_root, llm_venv, model_name, model_subdir): "run bench mark in MIG mode, check if the throughput is increasing by concurrency" - skip_engine_build = False results = {} concurrency_list = [1, 32, 64, 128] @@ -681,19 +678,17 @@ def test_trtllm_bench_mig_launch(llm_root, llm_venv, model_name, model_subdir, model_name=model_name, model_subdir=model_subdir, streaming=False, - use_pytorch_backend=use_pytorch_backend, + use_pytorch_backend=True, use_mpirun=False, tp_size=1, concurrency=concurrency, - num_requests=num_requests, - skip_engine_build=skip_engine_build) + num_requests=num_requests) output = runner() results[concurrency] = output print(f"\n=== Benchmark Results Comparison ===") print(f"Model: {model_name}") - print(f"Backend: {'PyTorch' if use_pytorch_backend else 'TensorRT'}") print( f"{'Concurrency':<15} {'Throughput':<15} {'Latency':<15} {'Num Requests':<15}" )