Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com>
Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
This commit is contained in:
xinhe-nv 2026-02-02 15:32:05 +08:00 committed by Yanchao Lu
parent d8e7c61ea9
commit 80708ba231

View File

@ -666,11 +666,8 @@ def test_trtllm_bench_llmapi_launch(llm_root, llm_venv, model_name,
ids=["llama3_1-8b"])
@pytest.mark.parametrize("model_subdir", ["llama-3.1-model/Meta-Llama-3.1-8B"],
ids=["llama_v3_1"])
@pytest.mark.parametrize("use_pytorch_backend", [False], ids=["trt_backend"])
def test_trtllm_bench_mig_launch(llm_root, llm_venv, model_name, model_subdir,
use_pytorch_backend):
def test_trtllm_bench_mig_launch(llm_root, llm_venv, model_name, model_subdir):
"run bench mark in MIG mode, check if the throughput is increasing by concurrency"
skip_engine_build = False
results = {}
concurrency_list = [1, 32, 64, 128]
@ -681,19 +678,17 @@ def test_trtllm_bench_mig_launch(llm_root, llm_venv, model_name, model_subdir,
model_name=model_name,
model_subdir=model_subdir,
streaming=False,
use_pytorch_backend=use_pytorch_backend,
use_pytorch_backend=True,
use_mpirun=False,
tp_size=1,
concurrency=concurrency,
num_requests=num_requests,
skip_engine_build=skip_engine_build)
num_requests=num_requests)
output = runner()
results[concurrency] = output
print(f"\n=== Benchmark Results Comparison ===")
print(f"Model: {model_name}")
print(f"Backend: {'PyTorch' if use_pytorch_backend else 'TensorRT'}")
print(
f"{'Concurrency':<15} {'Throughput':<15} {'Latency':<15} {'Num Requests':<15}"
)