feat: Add stress test for TRT-LLM (#3250)

Signed-off-by: Wangshanshan <dominicw@nvidia.com>
This commit is contained in:
dominicshanshan 2025-04-13 10:24:25 +08:00 committed by GitHub
parent 74850c61e9
commit 5d3180be82
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 1041 additions and 0 deletions

View File

@ -31,3 +31,4 @@ pytest-rerunfailures
ruff==0.9.4
lm_eval[api]==0.4.8
docstring_parser
genai-perf

File diff suppressed because it is too large Load Diff

View File

@ -18,6 +18,8 @@ l0_a10:
- disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]
- disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0]
- disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-MAX_UTILIZATION-pytorch-stress-test]
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-GUARANTEED_NO_EVICT-pytorch-stress-stage-alone]
- condition:
ranges:
system_gpu_count:
@ -108,6 +110,8 @@ l0_a10:
- examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_cpp_runtime]
- examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-enable_gemm_plugin]
- examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-enable_gemm_plugin] # 3 mins
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-MAX_UTILIZATION-trt-stress-test]
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-GUARANTEED_NO_EVICT-trt-stress-stage-alone]
- condition:
ranges:
system_gpu_count: