chore: remove useless allgather (#3751)

* remove useless allgather Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com> * fix pre-commit issue Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com> --------- Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-04-22 21:26:22 +08:00 · 2025-04-22 21:26:22 +08:00 · b87f26ee2a
commit b87f26ee2a
parent 47d2f16bb8
1 changed files with 5 additions and 8 deletions
--- a/tensorrt_llm/_torch/pyexecutor/_util.py
+++ b/tensorrt_llm/_torch/pyexecutor/_util.py
@ -7,8 +7,8 @@ import torch
 import tensorrt_llm
 import tensorrt_llm.bindings as tllm
 import tensorrt_llm.bindings.executor as trtllm
-from tensorrt_llm._utils import (mpi_allgather, mpi_broadcast,
-                                 str_dtype_to_binding, torch_dtype_to_str)
+from tensorrt_llm._utils import (mpi_broadcast, str_dtype_to_binding,
+                                 torch_dtype_to_str)
 from tensorrt_llm.bindings.executor import ExecutorConfig
 from tensorrt_llm.logger import logger
 from tensorrt_llm.lora_manager import LoraConfig, load_torch_hf_lora
@ -181,9 +181,7 @@ def estimate_max_kv_cache_tokens(py_executor: PyExecutor,
    req_ids = mpi_broadcast(req_ids, root=0)
    py_executor.start_worker()
    py_executor.await_responses(req_ids)
-
-    # sync all ranks after processing dummy requests
-    mpi_allgather(0)
+    # TODO check why call mpi_barrier() here will hang-on, but call mpi_allgather(0) is fine.

    torch_peak_memory = torch.cuda.memory_stats()["allocated_bytes.all.peak"]

@ -220,9 +218,8 @@ def estimate_max_kv_cache_tokens(py_executor: PyExecutor,
    py_executor.resource_manager.resource_managers.get(
        "kv_cache_manager").shutdown()

-    py_executor.shutdown()
-    # sync all ranks after creating new pyExecutor
-    mpi_allgather(0)
+    if py_executor.dist.mapping.rank == 0:
+        py_executor.shutdown()

    return kv_cache_max_tokens