mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[None][test] Filter out all fp8 test case for A100. (#8420)
Signed-off-by: yufeiwu <230315618+yufeiwu-nv@users.noreply.github.com>
This commit is contained in:
parent
70a0f5beb6
commit
1e1f430163
@ -27,11 +27,31 @@ llm_perf_nim:
|
||||
# cpp
|
||||
- perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-maxnt:5000-input_output_len:5000,500-con:250]
|
||||
- perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:500,2000-con:250]
|
||||
- perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:500,2000-quant:fp8-con:250]
|
||||
# reduced 'reqs' to fit timeout limit
|
||||
- perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:500,2000-reqs:8-con:1]
|
||||
|
||||
|
||||
# FP8 specific tests
|
||||
- condition:
|
||||
terms:
|
||||
supports_fp8: true
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 1
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*h100*'
|
||||
- '*h200*'
|
||||
- '*l40s*'
|
||||
- '*l20*'
|
||||
- '*h20*'
|
||||
tests:
|
||||
# Phi-4-mini-instruct
|
||||
- perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:500,2000-quant:fp8-con:250]
|
||||
# reduced 'reqs' to fit timeout limit
|
||||
- perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:500,2000-quant:fp8-reqs:8-con:1]
|
||||
|
||||
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
@ -190,7 +210,6 @@ llm_perf_nim:
|
||||
#llama_v3.2_1b
|
||||
#trt backend
|
||||
- perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-input_output_len:128,128-quant:fp8-gpus:2]
|
||||
- perf/test_perf.py::test_perf[llama_v3.2_1b-bench-pytorch-bfloat16-input_output_len:128,128-quant:fp8-gpus:2]
|
||||
- perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-input_output_len:512,32-quant:fp8-gpus:2]
|
||||
- perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-input_output_len:512,200-quant:fp8-gpus:2]
|
||||
#mistral_7b_v0.1
|
||||
@ -199,6 +218,7 @@ llm_perf_nim:
|
||||
- perf/test_perf.py::test_perf[mistral_7b_v0.1-bench-float16-input_output_len:500,2000-quant:fp8-tp:2]
|
||||
# torch backend
|
||||
- perf/test_perf.py::test_perf[mistral_7b_v0.1-bench-pytorch-float16-input_output_len:128,128]
|
||||
- perf/test_perf.py::test_perf[llama_v3.2_1b-bench-pytorch-bfloat16-input_output_len:128,128-gpus:2]
|
||||
#phi_3_mini_128k_instruct
|
||||
#trt backend
|
||||
- perf/test_perf.py::test_perf[phi_3_mini_128k_instruct-bench-float16-maxbs:128-input_output_len:1000,1000-quant:fp8-tp:2]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user