chore: remove useless allgather (#3751)

* remove useless allgather

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

* fix pre-commit issue

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

---------

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>
This commit is contained in:
bhsueh_NV 2025-04-22 21:26:22 +08:00 committed by GitHub
parent 47d2f16bb8
commit b87f26ee2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -7,8 +7,8 @@ import torch
import tensorrt_llm
import tensorrt_llm.bindings as tllm
import tensorrt_llm.bindings.executor as trtllm
from tensorrt_llm._utils import (mpi_allgather, mpi_broadcast,
str_dtype_to_binding, torch_dtype_to_str)
from tensorrt_llm._utils import (mpi_broadcast, str_dtype_to_binding,
torch_dtype_to_str)
from tensorrt_llm.bindings.executor import ExecutorConfig
from tensorrt_llm.logger import logger
from tensorrt_llm.lora_manager import LoraConfig, load_torch_hf_lora
@ -181,9 +181,7 @@ def estimate_max_kv_cache_tokens(py_executor: PyExecutor,
req_ids = mpi_broadcast(req_ids, root=0)
py_executor.start_worker()
py_executor.await_responses(req_ids)
# sync all ranks after processing dummy requests
mpi_allgather(0)
# TODO check why call mpi_barrier() here will hang-on, but call mpi_allgather(0) is fine.
torch_peak_memory = torch.cuda.memory_stats()["allocated_bytes.all.peak"]
@ -220,9 +218,8 @@ def estimate_max_kv_cache_tokens(py_executor: PyExecutor,
py_executor.resource_manager.resource_managers.get(
"kv_cache_manager").shutdown()
py_executor.shutdown()
# sync all ranks after creating new pyExecutor
mpi_allgather(0)
if py_executor.dist.mapping.rank == 0:
py_executor.shutdown()
return kv_cache_max_tokens