tests: [TRTQA-2906] add benchmark serving tests (#4901)

Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
Co-authored-by: Larry <197874197+LarryXFly@users.noreply.github.com>
This commit is contained in:
xinhe-nv 2025-06-05 14:33:03 +08:00 committed by GitHub
parent ddbaa5ef80
commit 1c3091c63b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 75 additions and 0 deletions

View File

@ -1433,6 +1433,20 @@ def test_openai_multinodes_chat_tp8pp2(llm_root, llm_venv):
])
@pytest.mark.skip_less_device_memory(80000)
def test_trtllm_benchmark_serving(llm_root, llm_venv):
example_root = Path(os.path.join(llm_root, "examples", "apps"))
test_root = unittest_path() / "llmapi" / "apps"
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
llm_venv.run_cmd(
["-m", "pytest",
str(test_root / "_test_trtllm_serve_benchmark.py")])
def test_build_time_benchmark_sanity(llm_root, llm_venv):
temp = tempfile.TemporaryDirectory()
llm_venv.run_cmd([

View File

@ -490,6 +490,7 @@ test_e2e.py::test_mistral_e2e[use_py_session---]
test_e2e.py::test_qwen_e2e_cpprunner_large_new_tokens[DeepSeek-R1-Distill-Qwen-1.5B-DeepSeek-R1-Distill-Qwen-1.5B]
test_e2e.py::test_openai_multi_chat_example
test_e2e.py::test_openai_consistent_chat
test_e2e.py::test_trtllm_benchmark_serving
llmapi/test_llm_examples.py::test_llmapi_server_example
# Pivot to Pytorch test cases.
test_e2e.py::test_ptp_quickstart

View File

@ -0,0 +1,60 @@
import os
import subprocess
import sys
import pytest
from utils.util import skip_gpu_memory_less_than_80gb
from .openai_server import RemoteOpenAIServer
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from test_llm import get_model_path
@pytest.fixture(scope="module")
def model_name():
return "llama-3.1-model/Meta-Llama-3.1-8B"
@pytest.fixture(scope="module")
def model_path(model_name: str):
return get_model_path(model_name)
@pytest.fixture(scope="module")
def server(model_path: str):
# fix port to facilitate concise trtllm-serve examples
with RemoteOpenAIServer(model_path, port=8000) as remote_server:
yield remote_server
@pytest.fixture(scope="module")
def benchmark_root():
llm_root = os.getenv("LLM_ROOT")
return os.path.join(llm_root, "tensorrt_llm", "serve", "scripts")
def dataset_path(dataset_name: str):
if dataset_name == "sharegpt":
return get_model_path(
"datasets/ShareGPT_V3_unfiltered_cleaned_split.json")
else:
raise ValueError(f"Invalid dataset name: {dataset_name}")
@skip_gpu_memory_less_than_80gb
def test_trtllm_serve_benchmark(server: RemoteOpenAIServer, benchmark_root: str,
model_path: str):
client_script = os.path.join(benchmark_root, "benchmark_serving.py")
dataset = dataset_path("sharegpt")
benchmark_cmd = [
"python3", client_script, "--dataset-name", "sharegpt", "--model",
"llama", "--dataset-path", dataset, "--tokenizer", model_path
]
# CalledProcessError will be raised if any errors occur
subprocess.run(benchmark_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True)