[None][fix] Update maxnt of llama_v3.2_1b bench (#7279)

Signed-off-by: nvamyt <amyt@nvidia.com> Co-authored-by: Larry <197874197+LarryXFly@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-08-27 16:56:28 +08:00 · 2025-08-27 16:56:28 +08:00 · dbd4f21687
commit dbd4f21687
parent f167b1fd99
1 changed files with 2 additions and 2 deletions
--- a/tests/integration/test_lists/qa/llm_perf_sanity.yml
+++ b/tests/integration/test_lists/qa/llm_perf_sanity.yml
@ -77,7 +77,7 @@ llm_perf_sanity:
  #pytorch backend
  - perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-maxbs:256-input_output_len:128,128-gpus:2]
  - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128-gpus:2]
-  - perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-maxbs:1-input_output_len:128,128-reqs:10-gpus:2]
+  - perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-maxbs:1-maxnt:256-input_output_len:128,128-reqs:10-gpus:2]
  - perf/test_perf.py::test_perf[llama_v3.2_1b-bench-pytorch-bfloat16-input_output_len:128,128-gpus:2]

 # FP8 tests for systems with 2+ GPUs
@ -213,7 +213,7 @@ llm_perf_sanity:

  tests:
  - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-input_output_len:128,128]
-  - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-streaming-pytorch-float8-input_output_len:128,128]
+  - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-streaming-float8-input_output_len:128,128]
  - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-streaming-float8-input_output_len:2000,500]
  - perf/test_perf.py::test_perf[llama_v3.1_405b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-tp:8-gpus:8]
  - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-ep:8-tp:8-gpus:8]