mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-16 15:55:08 +08:00
[https://nvbugs/5832481][test] Add gpt-oss-120b-Eagle3-throughput case on DGX-Spark (#11419)
Signed-off-by: Jenny Liu <JennyLiu-nv+JennyLiu@users.noreply.github.com> Co-authored-by: Jenny Liu <JennyLiu-nv+JennyLiu@users.noreply.github.com>
This commit is contained in:
parent
31cdbdfd72
commit
11d79aa875
@ -320,10 +320,10 @@ def get_model_yaml_config(model_label: str,
|
||||
'num_postprocess_workers': 4
|
||||
}
|
||||
},
|
||||
# GPT-OSS 120B speculative decoding (Eagle3 draft)
|
||||
# GPT-OSS 120B speculative decoding with Eagle3
|
||||
{
|
||||
'patterns': [
|
||||
'gpt_oss_120b_fp4-bench-pytorch-streaming-float4-maxbs:1-maxnt:4096-input_output_len:2048,128-reqs:1-con:1',
|
||||
'gpt_oss_120b_eagle3-bench-pytorch',
|
||||
],
|
||||
'config': {
|
||||
'enable_attention_dp': False,
|
||||
@ -337,9 +337,34 @@ def get_model_yaml_config(model_label: str,
|
||||
'decoding_type':
|
||||
'Eagle',
|
||||
'max_draft_len':
|
||||
5,
|
||||
3,
|
||||
'speculative_model_dir':
|
||||
f"{llm_models_root()}/gpt_oss/gpt-oss-120b-Eagle3",
|
||||
f'{llm_models_root()}/gpt_oss/gpt-oss-120b-Eagle3',
|
||||
},
|
||||
'kv_cache_config': {
|
||||
'enable_block_reuse': False,
|
||||
},
|
||||
}
|
||||
},
|
||||
# GPT-OSS 120B speculative decoding with Eagle3-throughput (https://nvbugspro.nvidia.com/bug/5832481)
|
||||
{
|
||||
'patterns': [
|
||||
'gpt_oss_120b_eagle3_throughput-bench-pytorch',
|
||||
],
|
||||
'config': {
|
||||
'enable_attention_dp': False,
|
||||
'disable_overlap_scheduler': True,
|
||||
'enable_autotuner': False,
|
||||
'cuda_graph_config': {
|
||||
'enable_padding': True,
|
||||
},
|
||||
'speculative_config': {
|
||||
'decoding_type':
|
||||
'Eagle',
|
||||
'max_draft_len':
|
||||
3,
|
||||
'speculative_model_dir':
|
||||
f'{llm_models_root()}/gpt_oss/gpt-oss-120b-Eagle3-throughput',
|
||||
},
|
||||
'kv_cache_config': {
|
||||
'enable_block_reuse': False,
|
||||
|
||||
@ -173,7 +173,8 @@ MODEL_PATH_DICT = {
|
||||
"mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503",
|
||||
"gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b",
|
||||
"gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
|
||||
"gpt_oss_120b_eagle3": "gpt_oss/gpt-oss-120b-Eagle3",
|
||||
"gpt_oss_120b_eagle3": "gpt_oss/gpt-oss-120b",
|
||||
"gpt_oss_120b_eagle3_throughput": "gpt_oss/gpt-oss-120b",
|
||||
"nemotron_nano_3_30b_fp8": "Nemotron-Nano-3-30B-A3.5B-FP8-KVFP8-dev",
|
||||
"nemotron_nano_12b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
|
||||
"nvidia_nemotron_nano_9b_v2_nvfp4": "NVIDIA-Nemotron-Nano-9B-v2-NVFP4",
|
||||
|
||||
@ -10,10 +10,10 @@ llm_spark_perf:
|
||||
lte: 1
|
||||
tests:
|
||||
- perf/test_perf.py::test_perf[gpt_oss_20b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
|
||||
# GPT-OSS 120B normal case (no spec dec)
|
||||
- perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
|
||||
# GPT-OSS 120B spec dec case (Eagle3)
|
||||
- perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-streaming-float4-maxbs:1-maxnt:4096-input_output_len:2048,128-reqs:1-con:1]
|
||||
- perf/test_perf.py::test_perf[gpt_oss_120b_eagle3-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
|
||||
# GPT-OSS 120B spec dec case (Eagle3-throughput) - https://nvbugspro.nvidia.com/bug/5832481
|
||||
- perf/test_perf.py::test_perf[gpt_oss_120b_eagle3_throughput-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
|
||||
- perf/test_perf.py::test_perf[nvidia_nemotron_nano_9b_v2_nvfp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
|
||||
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
|
||||
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user