[TRTLLM-9932][test] add kimi_k2 single node perf test (#10436)

Signed-off-by: Ruodi Lu <ruodil@users.noreply.github.com> Co-authored-by: Ruodi Lu <ruodil@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2026-01-09 18:36:50 +08:00 · 2026-01-09 18:36:50 +08:00 · 2b72d33fdc
commit 2b72d33fdc
parent 4632a8642d
3 changed files with 25 additions and 9 deletions
--- a/tests/integration/defs/perf/pytorch_model_config.py
+++ b/tests/integration/defs/perf/pytorch_model_config.py
@ -59,7 +59,7 @@ def get_model_yaml_config(model_label: str,
    pattern_configs = [
        # Deepseek default cases
        {
-            'patterns': 'deepseek_r1',
+            'patterns': ['deepseek_r1', 'kimi_k2_nvfp4'],
            'config': {
                'enable_attention_dp': True,
            }
--- a/tests/integration/defs/perf/test_perf.py
+++ b/tests/integration/defs/perf/test_perf.py
@ -144,6 +144,7 @@ MODEL_PATH_DICT = {
    "gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
    "nemotron_nano_9b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
    "starcoder2_7b": "starcoder2-7b",
+    "kimi_k2_nvfp4": "Kimi-K2-Thinking-NVFP4",
 }
 # Model PATH of HuggingFace
 HF_MODEL_PATH = {
--- a/tests/integration/test_lists/qa/llm_perf_core.yml
+++ b/tests/integration/test_lists/qa/llm_perf_core.yml
@ -14,10 +14,11 @@ llm_perf_core:
 # 9: H100, H20, H200, GB200, B200, B300, GB300, RTX6000-D, RTX6000-Server test cases
 # 10: GB200, B200, B300, GB300, RTX6000-Server test cases
 # 11: B200, GB200, B300, GB300 test cases
-# 12: H100, H20, H200, B200, B300 test cases
-# 13: H100, H20, H200, B200, B300, RTX-6000 Server test cases
-# 14: RTX-6000D, RTX-6000 Server test cases
-# 15: RTX6000-Server test cases
+# 12: B200, B300 test cases
+# 13: H100, H20, H200, B200, B300 test cases
+# 14: H100, H20, H200, B200, B300, RTX-6000 Server test cases
+# 15: RTX-6000D, RTX-6000 Server test cases
+# 16: RTX6000-Server test cases
 # ===============================================================================


@ -289,7 +290,21 @@ llm_perf_core:
  - perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-float4-maxbs:1000-maxnt:5000-kv_frac:0.85-input_output_len:5000,500-reqs:2000-ep:4-tp:4-gpus:4] TIMEOUT(120)
  - perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-float4-maxbs:32-maxnt:32768-input_output_len:8192,1024-reqs:20-con:1-ep:1-tp:4-gpus:4] TIMEOUT(120)

-# 12: H100, H20, H200, B200, B300 test cases
+# 12: B200, B300 test cases
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 8
+      compute_capability:
+        gte: 10.0
+        lte: 10.3
+  tests:
+  - perf/test_perf.py::test_perf[kimi_k2_nvfp4-bench-pytorch-float4-maxbs:16-input_output_len:128,128-reqs:20-con:1-ep:8-tp:8-gpus:8]
+  - perf/test_perf.py::test_perf[kimi_k2_nvfp4-bench-pytorch-float4-maxbs:256-input_output_len:2000,500-ep:8-tp:8-gpus:8]
+  - perf/test_perf.py::test_perf[kimi_k2_nvfp4-bench-pytorch-float4-maxbs:512-maxnt:2048-kv_frac:0.6-input_output_len:1000,1000-ep:8-tp:8-gpus:8]
+  - perf/test_perf.py::test_perf[kimi_k2_nvfp4-bench-pytorch-float4-maxbs:512-maxnt:2048-kv_frac:0.6-input_output_len:5000,500-reqs:2000-ep:8-tp:8-gpus:8] TIMEOUT(120)
+
+# 13: H100, H20, H200, B200, B300 test cases
 - condition:
    ranges:
      system_gpu_count:
@ -356,7 +371,7 @@ llm_perf_core:
  - perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8] TIMEOUT(60)


-# 13: H100, H20, H200, B200, B300, RTX-6000 Server test cases
+# 14: H100, H20, H200, B200, B300, RTX-6000 Server test cases
 - condition:
    ranges:
      system_gpu_count:
@ -368,7 +383,7 @@ llm_perf_core:
  - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-ep:8-tp:8-gpus:8]


-# 14: RTX-6000D, RTX-6000 Server test cases
+# 15: RTX-6000D, RTX-6000 Server test cases
 - condition:
    ranges:
      system_gpu_count:
@ -402,7 +417,7 @@ llm_perf_core:
  - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8-tp:2-gpus:2]


-# 15: RTX6000-Server test cases
+# 16: RTX6000-Server test cases
 - condition:
    ranges:
      system_gpu_count: