[None][feat] add Nemotron-Ultra multi nodes eval tests (#8577)

Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-10-23 14:44:26 +08:00 · 2025-10-23 14:44:26 +08:00 · 04e2b2752a
commit 04e2b2752a
parent 2956978da3
2 changed files with 5 additions and 0 deletions
--- a/tests/integration/defs/test_e2e.py
+++ b/tests/integration/defs/test_e2e.py
@ -3494,6 +3494,8 @@ def test_ptp_quickstart_advanced_llama_multi_nodes(llm_root, llm_venv,
    pytest.param('DeepSeek-R1/DeepSeek-R1-0528-FP4', marks=skip_pre_blackwell),
    pytest.param('Kimi-K2-Instruct',
                 marks=(skip_pre_hopper, skip_post_blackwell)),
+    pytest.param('nemotron-nas/Llama-3_1-Nemotron-Ultra-253B-v1',
+                 marks=skip_pre_hopper),
 ])
 def test_multi_nodes_eval(llm_venv, model_path, tp_size, pp_size, ep_size,
                          eval_task):
@ -3512,6 +3514,8 @@ def test_multi_nodes_eval(llm_venv, model_path, tp_size, pp_size, ep_size,
        "--max_batch_size=32",
        eval_task,
    ]
+
+    llm_venv._new_env["TRT_LLM_DISABLE_LOAD_WEIGHTS_IN_PARALLEL"] = "1"
    output = check_output(" ".join(run_cmd), shell=True, env=llm_venv._new_env)

    if os.environ.get("SLURM_PROCID", '0') == '0':
--- a/tests/integration/test_lists/qa/llm_function_multinode.txt
+++ b/tests/integration/test_lists/qa/llm_function_multinode.txt
@ -8,5 +8,6 @@ test_e2e.py::test_multi_nodes_eval[Qwen3/Qwen3-235B-A22B-tp16-mmlu]
 test_e2e.py::test_multi_nodes_eval[Qwen3/saved_models_Qwen3-235B-A22B_nvfp4_hf-tp16-mmlu]
 test_e2e.py::test_multi_nodes_eval[DeepSeek-R1/DeepSeek-R1-0528-FP4-tp16-mmlu]
 test_e2e.py::test_multi_nodes_eval[Kimi-K2-Instruct-tp16-mmlu]
+test_e2e.py::test_multi_nodes_eval[nemotron-nas/Llama-3_1-Nemotron-Ultra-253B-v1-tp16-mmlu]
 test_e2e.py::test_openai_disagg_multi_nodes_completion[ctx_tp2pp1-gen_tp2pp1]
 test_e2e.py::test_openai_disagg_multi_nodes_completion[ctx_tp1pp2-gen_tp1pp2]