mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[None][test] rename llm_perf_full to llm_perf_core and add missing cases (#7899)
Signed-off-by: Ruodi Lu <ruodil@users.noreply.github.com> Co-authored-by: Ruodi Lu <ruodil@users.noreply.github.com>
This commit is contained in:
parent
a4b4ed4535
commit
05bec3bf0f
@ -1,5 +1,5 @@
|
||||
version: 0.0.1
|
||||
llm_perf_full:
|
||||
llm_perf_core:
|
||||
# one gpu test
|
||||
- condition:
|
||||
ranges:
|
||||
@ -74,6 +74,8 @@ llm_perf_full:
|
||||
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-input_output_len:2000,500]
|
||||
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-streaming-float8-input_output_len:2000,500]
|
||||
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-input_output_len:500,2000]
|
||||
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-maxbs:512-maxnt:2048-kv_frac:0.85-input_output_len:5000,500-reqs:200]
|
||||
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-streaming-float8-input_output_len:128,128]
|
||||
- perf/test_perf.py::test_perf[mistral_small_v3.1_24b-bench-pytorch-bfloat16-maxbs:1-maxnt:20000-input_output_len:20000,2000-reqs:8-con:1] TIMEOUT(120)
|
||||
- perf/test_perf.py::test_perf[mistral_small_v3.1_24b-bench-pytorch-bfloat16-maxbs:4096-maxnt:20000-input_output_len:20000,2000-reqs:500-con:200] TIMEOUT(120)
|
||||
|
||||
@ -382,6 +384,7 @@ llm_perf_full:
|
||||
#llama_v3.1_70b
|
||||
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-input_output_len:128,128-tp:2-gpus:2]
|
||||
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
|
||||
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:128,128-reqs:10-gpus:2]
|
||||
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-kv_cache_dtype:fp8]
|
||||
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-tp:2-gpus:2]
|
||||
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp4-bench-pytorch-float4-input_output_len:512,32-kv_cache_dtype:fp8-tp:2-gpus:2]
|
||||
@ -308,6 +308,7 @@ llm_perf_nim:
|
||||
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-bfloat16-input_output_len:200,2000-reqs:64-con:200-gpus:8]
|
||||
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-bfloat16-input_output_len:200,2000-reqs:8-con:1-gpus:8] # timeout for h20, move to l2 test
|
||||
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-bfloat16-input_output_len:2000,200-reqs:64-gpus:8]
|
||||
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-maxbs:1-maxnt:544-input_output_len:512,32-quant:fp8-gpus:8]
|
||||
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-bfloat16-input_output_len:128,128-gpus:8]
|
||||
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-bfloat16-maxbs:16-maxnt:5000-input_output_len:5000,500-reqs:64-con:250-gpus:8]
|
||||
|
||||
|
||||
@ -1,2 +0,0 @@
|
||||
perf/test_perf.py::test_perf[llama_v3.1_70b-cppmanager-exe-plugin_ifb-float16-input_output_len:1024,1024-quant:fp8-tp:8-pp:2]
|
||||
perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-cppmanager-exe-plugin_ifb-float16-input_output_len:512,512-quant:fp8-tp:8-pp:2]
|
||||
Loading…
Reference in New Issue
Block a user