From 80708ba231a4b74587604e70ed964c79a077e286 Mon Sep 17 00:00:00 2001
From: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
Date: Mon, 2 Feb 2026 15:32:05 +0800
Subject: [PATCH] [https://nvbugs/5787904][fix] update mig tests (#11014)

Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com>
Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
---
 tests/integration/defs/test_e2e.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py
index 918f135c21..6c9f924066 100644
--- a/tests/integration/defs/test_e2e.py
+++ b/tests/integration/defs/test_e2e.py
@@ -666,11 +666,8 @@ def test_trtllm_bench_llmapi_launch(llm_root, llm_venv, model_name,
                          ids=["llama3_1-8b"])
 @pytest.mark.parametrize("model_subdir", ["llama-3.1-model/Meta-Llama-3.1-8B"],
                          ids=["llama_v3_1"])
-@pytest.mark.parametrize("use_pytorch_backend", [False], ids=["trt_backend"])
-def test_trtllm_bench_mig_launch(llm_root, llm_venv, model_name, model_subdir,
-                                 use_pytorch_backend):
+def test_trtllm_bench_mig_launch(llm_root, llm_venv, model_name, model_subdir):
     "run bench mark in MIG mode, check if the throughput is increasing by concurrency"
-    skip_engine_build = False
     results = {}
     concurrency_list = [1, 32, 64, 128]
 
@@ -681,19 +678,17 @@ def test_trtllm_bench_mig_launch(llm_root, llm_venv, model_name, model_subdir,
                              model_name=model_name,
                              model_subdir=model_subdir,
                              streaming=False,
-                             use_pytorch_backend=use_pytorch_backend,
+                             use_pytorch_backend=True,
                              use_mpirun=False,
                              tp_size=1,
                              concurrency=concurrency,
-                             num_requests=num_requests,
-                             skip_engine_build=skip_engine_build)
+                             num_requests=num_requests)
 
         output = runner()
         results[concurrency] = output
 
     print(f"\n=== Benchmark Results Comparison ===")
     print(f"Model: {model_name}")
-    print(f"Backend: {'PyTorch' if use_pytorch_backend else 'TensorRT'}")
     print(
         f"{'Concurrency':<15} {'Throughput':<15} {'Latency':<15} {'Num Requests':<15}"
     )