mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
feat: Add stress test for TRT-LLM (#3250)
Signed-off-by: Wangshanshan <dominicw@nvidia.com>
This commit is contained in:
parent
74850c61e9
commit
5d3180be82
@ -31,3 +31,4 @@ pytest-rerunfailures
|
||||
ruff==0.9.4
|
||||
lm_eval[api]==0.4.8
|
||||
docstring_parser
|
||||
genai-perf
|
||||
|
||||
1036
tests/integration/defs/stress_test/stress_test.py
Normal file
1036
tests/integration/defs/stress_test/stress_test.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -18,6 +18,8 @@ l0_a10:
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0]
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]
|
||||
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-MAX_UTILIZATION-pytorch-stress-test]
|
||||
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-GUARANTEED_NO_EVICT-pytorch-stress-stage-alone]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
@ -108,6 +110,8 @@ l0_a10:
|
||||
- examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_cpp_runtime]
|
||||
- examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-enable_gemm_plugin]
|
||||
- examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-enable_gemm_plugin] # 3 mins
|
||||
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-MAX_UTILIZATION-trt-stress-test]
|
||||
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-GUARANTEED_NO_EVICT-trt-stress-stage-alone]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user