[TRTLLM-9932][test] add kimi_k2 single node perf test (#10436)

Signed-off-by: Ruodi Lu <ruodil@users.noreply.github.com>
Co-authored-by: Ruodi Lu <ruodil@users.noreply.github.com>
This commit is contained in:
ruodil 2026-01-09 18:36:50 +08:00 committed by GitHub
parent 4632a8642d
commit 2b72d33fdc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 25 additions and 9 deletions

View File

@ -59,7 +59,7 @@ def get_model_yaml_config(model_label: str,
pattern_configs = [
# Deepseek default cases
{
'patterns': 'deepseek_r1',
'patterns': ['deepseek_r1', 'kimi_k2_nvfp4'],
'config': {
'enable_attention_dp': True,
}

View File

@ -144,6 +144,7 @@ MODEL_PATH_DICT = {
"gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
"nemotron_nano_9b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
"starcoder2_7b": "starcoder2-7b",
"kimi_k2_nvfp4": "Kimi-K2-Thinking-NVFP4",
}
# Model PATH of HuggingFace
HF_MODEL_PATH = {

View File

@ -14,10 +14,11 @@ llm_perf_core:
# 9: H100, H20, H200, GB200, B200, B300, GB300, RTX6000-D, RTX6000-Server test cases
# 10: GB200, B200, B300, GB300, RTX6000-Server test cases
# 11: B200, GB200, B300, GB300 test cases
# 12: H100, H20, H200, B200, B300 test cases
# 13: H100, H20, H200, B200, B300, RTX-6000 Server test cases
# 14: RTX-6000D, RTX-6000 Server test cases
# 15: RTX6000-Server test cases
# 12: B200, B300 test cases
# 13: H100, H20, H200, B200, B300 test cases
# 14: H100, H20, H200, B200, B300, RTX-6000 Server test cases
# 15: RTX-6000D, RTX-6000 Server test cases
# 16: RTX6000-Server test cases
# ===============================================================================
@ -289,7 +290,21 @@ llm_perf_core:
- perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-float4-maxbs:1000-maxnt:5000-kv_frac:0.85-input_output_len:5000,500-reqs:2000-ep:4-tp:4-gpus:4] TIMEOUT(120)
- perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-float4-maxbs:32-maxnt:32768-input_output_len:8192,1024-reqs:20-con:1-ep:1-tp:4-gpus:4] TIMEOUT(120)
# 12: H100, H20, H200, B200, B300 test cases
# 12: B200, B300 test cases
- condition:
ranges:
system_gpu_count:
gte: 8
compute_capability:
gte: 10.0
lte: 10.3
tests:
- perf/test_perf.py::test_perf[kimi_k2_nvfp4-bench-pytorch-float4-maxbs:16-input_output_len:128,128-reqs:20-con:1-ep:8-tp:8-gpus:8]
- perf/test_perf.py::test_perf[kimi_k2_nvfp4-bench-pytorch-float4-maxbs:256-input_output_len:2000,500-ep:8-tp:8-gpus:8]
- perf/test_perf.py::test_perf[kimi_k2_nvfp4-bench-pytorch-float4-maxbs:512-maxnt:2048-kv_frac:0.6-input_output_len:1000,1000-ep:8-tp:8-gpus:8]
- perf/test_perf.py::test_perf[kimi_k2_nvfp4-bench-pytorch-float4-maxbs:512-maxnt:2048-kv_frac:0.6-input_output_len:5000,500-reqs:2000-ep:8-tp:8-gpus:8] TIMEOUT(120)
# 13: H100, H20, H200, B200, B300 test cases
- condition:
ranges:
system_gpu_count:
@ -356,7 +371,7 @@ llm_perf_core:
- perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8] TIMEOUT(60)
# 13: H100, H20, H200, B200, B300, RTX-6000 Server test cases
# 14: H100, H20, H200, B200, B300, RTX-6000 Server test cases
- condition:
ranges:
system_gpu_count:
@ -368,7 +383,7 @@ llm_perf_core:
- perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-ep:8-tp:8-gpus:8]
# 14: RTX-6000D, RTX-6000 Server test cases
# 15: RTX-6000D, RTX-6000 Server test cases
- condition:
ranges:
system_gpu_count:
@ -402,7 +417,7 @@ llm_perf_core:
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8-tp:2-gpus:2]
# 15: RTX6000-Server test cases
# 16: RTX6000-Server test cases
- condition:
ranges:
system_gpu_count: