mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
fix[nvbug/5286515]: trtllm-llmapi-launch on single node single gpu (#4428)
* add test Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> * fix Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> --------- Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com>
This commit is contained in:
parent
bc6a69e4cb
commit
174c5188a2
@ -179,9 +179,6 @@ class MpiPoolSession(MpiSession):
|
||||
class MpiCommSession(MpiSession):
|
||||
|
||||
def __init__(self, comm=None, n_workers: int = 1):
|
||||
if not external_mpi_comm_available(n_workers):
|
||||
raise RuntimeError('The LLM instance should be launched by mpirun.')
|
||||
|
||||
self.comm = comm
|
||||
self.n_workers = n_workers
|
||||
self.thread_pool: Optional[ThreadPoolExecutor] = None
|
||||
|
||||
@ -315,8 +315,8 @@ def test_llm_multi_node(engine_from_checkpoint: tempfile.TemporaryDirectory):
|
||||
|
||||
|
||||
@skip_single_gpu
|
||||
def test_llm_multi_node_pytorch():
|
||||
nworkers = 2
|
||||
@pytest.mark.parametrize("nworkers", [1, 2])
|
||||
def test_llm_multi_node_pytorch(nworkers: int):
|
||||
test_case_file = os.path.join(os.path.dirname(__file__), "run_llm.py")
|
||||
os.path.join(os.path.dirname(__file__), "launch.py")
|
||||
command = f"mpirun --allow-run-as-root -n {nworkers} trtllm-llmapi-launch python3 {test_case_file} --model_dir {llama_model_path} --tp_size {nworkers} --use_pytorch"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user