[https://nvbugs/5787904][fix] update mig tests (#11014)

Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
2026-02-18 16:55:08 +08:00 · 2026-02-02 15:32:05 +08:00 · 2026-02-02 15:32:05 +08:00 · 80708ba231
commit 80708ba231
parent d8e7c61ea9
1 changed files with 3 additions and 8 deletions
--- a/tests/integration/defs/test_e2e.py
+++ b/tests/integration/defs/test_e2e.py
@ -666,11 +666,8 @@ def test_trtllm_bench_llmapi_launch(llm_root, llm_venv, model_name,
                         ids=["llama3_1-8b"])
@pytest.mark.parametrize("model_subdir", ["llama-3.1-model/Meta-Llama-3.1-8B"],
                         ids=["llama_v3_1"])
-@pytest.mark.parametrize("use_pytorch_backend", [False], ids=["trt_backend"])
-def test_trtllm_bench_mig_launch(llm_root, llm_venv, model_name, model_subdir,
-                                 use_pytorch_backend):
+def test_trtllm_bench_mig_launch(llm_root, llm_venv, model_name, model_subdir):
    "run bench mark in MIG mode, check if the throughput is increasing by concurrency"
-    skip_engine_build = False
    results = {}
    concurrency_list = [1, 32, 64, 128]

@ -681,19 +678,17 @@ def test_trtllm_bench_mig_launch(llm_root, llm_venv, model_name, model_subdir,
                             model_name=model_name,
                             model_subdir=model_subdir,
                             streaming=False,
-                             use_pytorch_backend=use_pytorch_backend,
+                             use_pytorch_backend=True,
                             use_mpirun=False,
                             tp_size=1,
                             concurrency=concurrency,
-                             num_requests=num_requests,
-                             skip_engine_build=skip_engine_build)
+                             num_requests=num_requests)

        output = runner()
        results[concurrency] = output

    print(f"\n=== Benchmark Results Comparison ===")
    print(f"Model: {model_name}")
-    print(f"Backend: {'PyTorch' if use_pytorch_backend else 'TensorRT'}")
    print(
        f"{'Concurrency':<15} {'Throughput':<15} {'Latency':<15} {'Num Requests':<15}"
    )