fix[nvbug/5286515]: trtllm-llmapi-launch on single node single gpu (#4428)

* add test Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> * fix Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> --------- Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-05-20 20:16:14 +08:00 · 2025-05-20 20:16:14 +08:00 · 174c5188a2
commit 174c5188a2
parent bc6a69e4cb
2 changed files with 2 additions and 5 deletions
--- a/tensorrt_llm/llmapi/mpi_session.py
+++ b/tensorrt_llm/llmapi/mpi_session.py
@ -179,9 +179,6 @@ class MpiPoolSession(MpiSession):
 class MpiCommSession(MpiSession):

    def __init__(self, comm=None, n_workers: int = 1):
-        if not external_mpi_comm_available(n_workers):
-            raise RuntimeError('The LLM instance should be launched by mpirun.')
-
        self.comm = comm
        self.n_workers = n_workers
        self.thread_pool: Optional[ThreadPoolExecutor] = None
--- a/tests/unittest/llmapi/test_llm_multi_gpu.py
+++ b/tests/unittest/llmapi/test_llm_multi_gpu.py
@ -315,8 +315,8 @@ def test_llm_multi_node(engine_from_checkpoint: tempfile.TemporaryDirectory):


@skip_single_gpu
-def test_llm_multi_node_pytorch():
-    nworkers = 2
+@pytest.mark.parametrize("nworkers", [1, 2])
+def test_llm_multi_node_pytorch(nworkers: int):
    test_case_file = os.path.join(os.path.dirname(__file__), "run_llm.py")
    os.path.join(os.path.dirname(__file__), "launch.py")
    command = f"mpirun --allow-run-as-root -n {nworkers} trtllm-llmapi-launch python3 {test_case_file} --model_dir {llama_model_path} --tp_size {nworkers} --use_pytorch"