mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[TRTLLM-9932][test] add kimi_k2 single node perf test (#10436)
Signed-off-by: Ruodi Lu <ruodil@users.noreply.github.com> Co-authored-by: Ruodi Lu <ruodil@users.noreply.github.com>
This commit is contained in:
parent
4632a8642d
commit
2b72d33fdc
@ -59,7 +59,7 @@ def get_model_yaml_config(model_label: str,
|
||||
pattern_configs = [
|
||||
# Deepseek default cases
|
||||
{
|
||||
'patterns': 'deepseek_r1',
|
||||
'patterns': ['deepseek_r1', 'kimi_k2_nvfp4'],
|
||||
'config': {
|
||||
'enable_attention_dp': True,
|
||||
}
|
||||
|
||||
@ -144,6 +144,7 @@ MODEL_PATH_DICT = {
|
||||
"gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
|
||||
"nemotron_nano_9b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
|
||||
"starcoder2_7b": "starcoder2-7b",
|
||||
"kimi_k2_nvfp4": "Kimi-K2-Thinking-NVFP4",
|
||||
}
|
||||
# Model PATH of HuggingFace
|
||||
HF_MODEL_PATH = {
|
||||
|
||||
@ -14,10 +14,11 @@ llm_perf_core:
|
||||
# 9: H100, H20, H200, GB200, B200, B300, GB300, RTX6000-D, RTX6000-Server test cases
|
||||
# 10: GB200, B200, B300, GB300, RTX6000-Server test cases
|
||||
# 11: B200, GB200, B300, GB300 test cases
|
||||
# 12: H100, H20, H200, B200, B300 test cases
|
||||
# 13: H100, H20, H200, B200, B300, RTX-6000 Server test cases
|
||||
# 14: RTX-6000D, RTX-6000 Server test cases
|
||||
# 15: RTX6000-Server test cases
|
||||
# 12: B200, B300 test cases
|
||||
# 13: H100, H20, H200, B200, B300 test cases
|
||||
# 14: H100, H20, H200, B200, B300, RTX-6000 Server test cases
|
||||
# 15: RTX-6000D, RTX-6000 Server test cases
|
||||
# 16: RTX6000-Server test cases
|
||||
# ===============================================================================
|
||||
|
||||
|
||||
@ -289,7 +290,21 @@ llm_perf_core:
|
||||
- perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-float4-maxbs:1000-maxnt:5000-kv_frac:0.85-input_output_len:5000,500-reqs:2000-ep:4-tp:4-gpus:4] TIMEOUT(120)
|
||||
- perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-float4-maxbs:32-maxnt:32768-input_output_len:8192,1024-reqs:20-con:1-ep:1-tp:4-gpus:4] TIMEOUT(120)
|
||||
|
||||
# 12: H100, H20, H200, B200, B300 test cases
|
||||
# 12: B200, B300 test cases
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 8
|
||||
compute_capability:
|
||||
gte: 10.0
|
||||
lte: 10.3
|
||||
tests:
|
||||
- perf/test_perf.py::test_perf[kimi_k2_nvfp4-bench-pytorch-float4-maxbs:16-input_output_len:128,128-reqs:20-con:1-ep:8-tp:8-gpus:8]
|
||||
- perf/test_perf.py::test_perf[kimi_k2_nvfp4-bench-pytorch-float4-maxbs:256-input_output_len:2000,500-ep:8-tp:8-gpus:8]
|
||||
- perf/test_perf.py::test_perf[kimi_k2_nvfp4-bench-pytorch-float4-maxbs:512-maxnt:2048-kv_frac:0.6-input_output_len:1000,1000-ep:8-tp:8-gpus:8]
|
||||
- perf/test_perf.py::test_perf[kimi_k2_nvfp4-bench-pytorch-float4-maxbs:512-maxnt:2048-kv_frac:0.6-input_output_len:5000,500-reqs:2000-ep:8-tp:8-gpus:8] TIMEOUT(120)
|
||||
|
||||
# 13: H100, H20, H200, B200, B300 test cases
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
@ -356,7 +371,7 @@ llm_perf_core:
|
||||
- perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8] TIMEOUT(60)
|
||||
|
||||
|
||||
# 13: H100, H20, H200, B200, B300, RTX-6000 Server test cases
|
||||
# 14: H100, H20, H200, B200, B300, RTX-6000 Server test cases
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
@ -368,7 +383,7 @@ llm_perf_core:
|
||||
- perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-ep:8-tp:8-gpus:8]
|
||||
|
||||
|
||||
# 14: RTX-6000D, RTX-6000 Server test cases
|
||||
# 15: RTX-6000D, RTX-6000 Server test cases
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
@ -402,7 +417,7 @@ llm_perf_core:
|
||||
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8-tp:2-gpus:2]
|
||||
|
||||
|
||||
# 15: RTX6000-Server test cases
|
||||
# 16: RTX6000-Server test cases
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user