mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[None][feat] add Nemotron-Ultra multi nodes eval tests (#8577)
Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com>
This commit is contained in:
parent
2956978da3
commit
04e2b2752a
@ -3494,6 +3494,8 @@ def test_ptp_quickstart_advanced_llama_multi_nodes(llm_root, llm_venv,
|
||||
pytest.param('DeepSeek-R1/DeepSeek-R1-0528-FP4', marks=skip_pre_blackwell),
|
||||
pytest.param('Kimi-K2-Instruct',
|
||||
marks=(skip_pre_hopper, skip_post_blackwell)),
|
||||
pytest.param('nemotron-nas/Llama-3_1-Nemotron-Ultra-253B-v1',
|
||||
marks=skip_pre_hopper),
|
||||
])
|
||||
def test_multi_nodes_eval(llm_venv, model_path, tp_size, pp_size, ep_size,
|
||||
eval_task):
|
||||
@ -3512,6 +3514,8 @@ def test_multi_nodes_eval(llm_venv, model_path, tp_size, pp_size, ep_size,
|
||||
"--max_batch_size=32",
|
||||
eval_task,
|
||||
]
|
||||
|
||||
llm_venv._new_env["TRT_LLM_DISABLE_LOAD_WEIGHTS_IN_PARALLEL"] = "1"
|
||||
output = check_output(" ".join(run_cmd), shell=True, env=llm_venv._new_env)
|
||||
|
||||
if os.environ.get("SLURM_PROCID", '0') == '0':
|
||||
|
||||
@ -8,5 +8,6 @@ test_e2e.py::test_multi_nodes_eval[Qwen3/Qwen3-235B-A22B-tp16-mmlu]
|
||||
test_e2e.py::test_multi_nodes_eval[Qwen3/saved_models_Qwen3-235B-A22B_nvfp4_hf-tp16-mmlu]
|
||||
test_e2e.py::test_multi_nodes_eval[DeepSeek-R1/DeepSeek-R1-0528-FP4-tp16-mmlu]
|
||||
test_e2e.py::test_multi_nodes_eval[Kimi-K2-Instruct-tp16-mmlu]
|
||||
test_e2e.py::test_multi_nodes_eval[nemotron-nas/Llama-3_1-Nemotron-Ultra-253B-v1-tp16-mmlu]
|
||||
test_e2e.py::test_openai_disagg_multi_nodes_completion[ctx_tp2pp1-gen_tp2pp1]
|
||||
test_e2e.py::test_openai_disagg_multi_nodes_completion[ctx_tp1pp2-gen_tp1pp2]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user