tests: add llama 3.3 70b 2 nodes tests (#4391)

* add llama 3.3 70b 2 nodes tests

Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>

* remove enable_overlap_scheduler parameter

Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>

---------

Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
This commit is contained in:
xinhe-nv 2025-05-21 12:42:45 +08:00 committed by GitHub
parent 6a35c599ef
commit 750f412b8f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 34 additions and 8 deletions

View File

@ -1551,20 +1551,19 @@ def test_ptq_quickstart_advanced_mtp(llm_root, llm_venv, model_name,
@pytest.mark.skip_less_device_memory(80000)
@pytest.mark.skip_less_device(8)
@pytest.mark.parametrize("model_name,model_path", [
pytest.param('DeepSeek-V3', 'DeepSeek-V3', marks=skip_pre_hopper),
])
@skip_pre_hopper
@skip_post_blackwell
@pytest.mark.parametrize("model_path", ['DeepSeek-V3'])
def test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus(
llm_root, llm_venv, model_name, model_path):
llm_root, llm_venv, model_path):
# "RCCA https://nvbugs/5163844"
print(f"Testing {model_name}.")
print(f"Testing {model_path}.")
example_root = Path(os.path.join(llm_root, "examples", "pytorch"))
run_cmd = [
"trtllm-llmapi-launch",
"python3",
str(example_root / "quickstart_advanced.py"),
"--model_dir",
f"{llm_models_root()}/{model_path}",
f"--model_dir={llm_models_root()}/{model_path}",
"--moe_ep_size=8",
"--tp_size=16",
"--use_cuda_graph",
@ -2063,4 +2062,30 @@ def test_ptp_scaffolding(llm_root, llm_venv, model_name, model_path):
])
@pytest.mark.skip_less_device_memory(80000)
@pytest.mark.skip_less_device(4)
@pytest.mark.parametrize("model_path", [
pytest.param('llama-3.3-models/Llama-3.3-70B-Instruct',
marks=skip_pre_hopper),
pytest.param('Llama-4-Maverick-17B-128E-Instruct', marks=skip_pre_hopper),
])
def test_ptp_quickstart_advanced_llama_2nodes(llm_root, llm_venv, model_path):
print(f"Testing {model_path}.")
example_root = Path(os.path.join(llm_root, "examples", "pytorch"))
run_cmd = [
"trtllm-llmapi-launch",
"python3",
str(example_root / "quickstart_advanced.py"),
f"--model_dir={llm_models_root()}/{model_path}",
"--moe_ep_size=8",
"--tp_size=16",
"--use_cuda_graph",
f"--kv_cache_fraction={_MEM_FRACTION_50}",
"--max_batch_size=32",
"--max_num_tokens=2048",
"--disable_kv_cache_reuse",
]
check_call(" ".join(run_cmd), shell=True, env=llm_venv._new_env)
# End of Pivot-To-Python examples

View File

@ -2,5 +2,6 @@ examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-infer]
examples/test_mixtral.py::test_llm_mixtral_2nodes_8gpus[Mixtral-8x22B-v0.1-plugin-renormalize-tensor_parallel-build]
examples/test_mixtral.py::test_llm_mixtral_2nodes_8gpus[Mixtral-8x22B-v0.1-plugin-renormalize-tensor_parallel-infer]
test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus[DeepSeek-V3-DeepSeek-V3]
test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus[DeepSeek-V3]
test_e2e.py::test_ptp_quickstart_advanced_llama_2nodes[llama-3.3-models/Llama-3.3-70B-Instruct]
test_e2e.py::test_openai_multinodes_chat_tp16pp1