[None][test] Filter out all fp8 test case for A100. (#8420)

Signed-off-by: yufeiwu <230315618+yufeiwu-nv@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-10-17 11:42:50 +08:00 · 2025-10-17 11:42:50 +08:00 · 1e1f430163
commit 1e1f430163
parent 70a0f5beb6
1 changed files with 22 additions and 2 deletions
--- a/tests/integration/test_lists/qa/llm_perf_nim.yml
+++ b/tests/integration/test_lists/qa/llm_perf_nim.yml
@ -27,11 +27,31 @@ llm_perf_nim:
  # cpp
  - perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-maxnt:5000-input_output_len:5000,500-con:250]
  - perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:500,2000-con:250]
-  - perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:500,2000-quant:fp8-con:250]
  # reduced 'reqs' to fit timeout limit
  - perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:500,2000-reqs:8-con:1]
+
+
+# FP8 specific tests
+- condition:
+    terms:
+      supports_fp8: true
+    ranges:
+      system_gpu_count:
+        gte: 1
+    wildcards:
+      gpu:
+      - '*h100*'
+      - '*h200*'
+      - '*l40s*'
+      - '*l20*'
+      - '*h20*'
+  tests:
+  # Phi-4-mini-instruct
+  - perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:500,2000-quant:fp8-con:250]
+  # reduced 'reqs' to fit timeout limit
  - perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:500,2000-quant:fp8-reqs:8-con:1]

+
 - condition:
    ranges:
      system_gpu_count:
@ -190,7 +210,6 @@ llm_perf_nim:
  #llama_v3.2_1b
  #trt backend
  - perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-input_output_len:128,128-quant:fp8-gpus:2]
-  - perf/test_perf.py::test_perf[llama_v3.2_1b-bench-pytorch-bfloat16-input_output_len:128,128-quant:fp8-gpus:2]
  - perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-input_output_len:512,32-quant:fp8-gpus:2]
  - perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-input_output_len:512,200-quant:fp8-gpus:2]
  #mistral_7b_v0.1
@ -199,6 +218,7 @@ llm_perf_nim:
  - perf/test_perf.py::test_perf[mistral_7b_v0.1-bench-float16-input_output_len:500,2000-quant:fp8-tp:2]
  # torch backend
  - perf/test_perf.py::test_perf[mistral_7b_v0.1-bench-pytorch-float16-input_output_len:128,128]
+  - perf/test_perf.py::test_perf[llama_v3.2_1b-bench-pytorch-bfloat16-input_output_len:128,128-gpus:2]
  #phi_3_mini_128k_instruct
  #trt backend
  - perf/test_perf.py::test_perf[phi_3_mini_128k_instruct-bench-float16-maxbs:128-input_output_len:1000,1000-quant:fp8-tp:2]