From 750f412b8f51a3c378f77900a11529445b65b2a4 Mon Sep 17 00:00:00 2001 From: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> Date: Wed, 21 May 2025 12:42:45 +0800 Subject: [PATCH] tests: add llama 3.3 70b 2 nodes tests (#4391) * add llama 3.3 70b 2 nodes tests Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> * remove enable_overlap_scheduler parameter Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> --------- Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> --- tests/integration/defs/test_e2e.py | 39 +++++++++++++++---- .../qa/llm_multinodes_function_test.txt | 3 +- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 2abcb8a4d0..26479dc578 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -1551,20 +1551,19 @@ def test_ptq_quickstart_advanced_mtp(llm_root, llm_venv, model_name, @pytest.mark.skip_less_device_memory(80000) @pytest.mark.skip_less_device(8) -@pytest.mark.parametrize("model_name,model_path", [ - pytest.param('DeepSeek-V3', 'DeepSeek-V3', marks=skip_pre_hopper), -]) +@skip_pre_hopper +@skip_post_blackwell +@pytest.mark.parametrize("model_path", ['DeepSeek-V3']) def test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus( - llm_root, llm_venv, model_name, model_path): + llm_root, llm_venv, model_path): # "RCCA https://nvbugs/5163844" - print(f"Testing {model_name}.") + print(f"Testing {model_path}.") example_root = Path(os.path.join(llm_root, "examples", "pytorch")) run_cmd = [ "trtllm-llmapi-launch", "python3", str(example_root / "quickstart_advanced.py"), - "--model_dir", - f"{llm_models_root()}/{model_path}", + f"--model_dir={llm_models_root()}/{model_path}", "--moe_ep_size=8", "--tp_size=16", "--use_cuda_graph", @@ -2063,4 +2062,30 @@ def test_ptp_scaffolding(llm_root, llm_venv, model_name, model_path): ]) +@pytest.mark.skip_less_device_memory(80000) +@pytest.mark.skip_less_device(4) +@pytest.mark.parametrize("model_path", [ + pytest.param('llama-3.3-models/Llama-3.3-70B-Instruct', + marks=skip_pre_hopper), + pytest.param('Llama-4-Maverick-17B-128E-Instruct', marks=skip_pre_hopper), +]) +def test_ptp_quickstart_advanced_llama_2nodes(llm_root, llm_venv, model_path): + print(f"Testing {model_path}.") + example_root = Path(os.path.join(llm_root, "examples", "pytorch")) + run_cmd = [ + "trtllm-llmapi-launch", + "python3", + str(example_root / "quickstart_advanced.py"), + f"--model_dir={llm_models_root()}/{model_path}", + "--moe_ep_size=8", + "--tp_size=16", + "--use_cuda_graph", + f"--kv_cache_fraction={_MEM_FRACTION_50}", + "--max_batch_size=32", + "--max_num_tokens=2048", + "--disable_kv_cache_reuse", + ] + check_call(" ".join(run_cmd), shell=True, env=llm_venv._new_env) + + # End of Pivot-To-Python examples diff --git a/tests/integration/test_lists/qa/llm_multinodes_function_test.txt b/tests/integration/test_lists/qa/llm_multinodes_function_test.txt index b31ab7581e..ecc64cf02f 100644 --- a/tests/integration/test_lists/qa/llm_multinodes_function_test.txt +++ b/tests/integration/test_lists/qa/llm_multinodes_function_test.txt @@ -2,5 +2,6 @@ examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-infer] examples/test_mixtral.py::test_llm_mixtral_2nodes_8gpus[Mixtral-8x22B-v0.1-plugin-renormalize-tensor_parallel-build] examples/test_mixtral.py::test_llm_mixtral_2nodes_8gpus[Mixtral-8x22B-v0.1-plugin-renormalize-tensor_parallel-infer] -test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus[DeepSeek-V3-DeepSeek-V3] +test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus[DeepSeek-V3] +test_e2e.py::test_ptp_quickstart_advanced_llama_2nodes[llama-3.3-models/Llama-3.3-70B-Instruct] test_e2e.py::test_openai_multinodes_chat_tp16pp1