mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
chore: remove useless allgather (#3751)
* remove useless allgather Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com> * fix pre-commit issue Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com> --------- Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>
This commit is contained in:
parent
47d2f16bb8
commit
b87f26ee2a
@ -7,8 +7,8 @@ import torch
|
||||
import tensorrt_llm
|
||||
import tensorrt_llm.bindings as tllm
|
||||
import tensorrt_llm.bindings.executor as trtllm
|
||||
from tensorrt_llm._utils import (mpi_allgather, mpi_broadcast,
|
||||
str_dtype_to_binding, torch_dtype_to_str)
|
||||
from tensorrt_llm._utils import (mpi_broadcast, str_dtype_to_binding,
|
||||
torch_dtype_to_str)
|
||||
from tensorrt_llm.bindings.executor import ExecutorConfig
|
||||
from tensorrt_llm.logger import logger
|
||||
from tensorrt_llm.lora_manager import LoraConfig, load_torch_hf_lora
|
||||
@ -181,9 +181,7 @@ def estimate_max_kv_cache_tokens(py_executor: PyExecutor,
|
||||
req_ids = mpi_broadcast(req_ids, root=0)
|
||||
py_executor.start_worker()
|
||||
py_executor.await_responses(req_ids)
|
||||
|
||||
# sync all ranks after processing dummy requests
|
||||
mpi_allgather(0)
|
||||
# TODO check why call mpi_barrier() here will hang-on, but call mpi_allgather(0) is fine.
|
||||
|
||||
torch_peak_memory = torch.cuda.memory_stats()["allocated_bytes.all.peak"]
|
||||
|
||||
@ -220,9 +218,8 @@ def estimate_max_kv_cache_tokens(py_executor: PyExecutor,
|
||||
py_executor.resource_manager.resource_managers.get(
|
||||
"kv_cache_manager").shutdown()
|
||||
|
||||
py_executor.shutdown()
|
||||
# sync all ranks after creating new pyExecutor
|
||||
mpi_allgather(0)
|
||||
if py_executor.dist.mapping.rank == 0:
|
||||
py_executor.shutdown()
|
||||
|
||||
return kv_cache_max_tokens
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user