mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
fix: add warmup flag into py_executor to prevent enable profiler during wa… (#3852)
* add warmup flag into py_executor to prevent enable profiler during warmup Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com> * fix bug of pre-commit Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com> * change setting warmup to all ranks Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com> --------- Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>
This commit is contained in:
parent
e2318756ed
commit
76f2c631fb
@ -206,6 +206,7 @@ def estimate_max_kv_cache_tokens(py_executor: PyExecutor,
|
||||
req = create_dummy_context_requests(max_num_tokens, seq_len, vocab_size)
|
||||
req_ids = py_executor.enqueue_requests(req)
|
||||
req_ids = mpi_broadcast(req_ids, root=0)
|
||||
py_executor.is_warmup = True
|
||||
py_executor.start_worker()
|
||||
py_executor.await_responses(req_ids)
|
||||
# TODO check why call mpi_barrier() here will hang-on, but call mpi_allgather(0) is fine.
|
||||
@ -250,6 +251,7 @@ def estimate_max_kv_cache_tokens(py_executor: PyExecutor,
|
||||
py_executor.resource_manager.resource_managers.get(
|
||||
"kv_cache_manager").shutdown()
|
||||
|
||||
py_executor.is_warmup = False
|
||||
if py_executor.dist.mapping.rank == 0:
|
||||
py_executor.shutdown()
|
||||
|
||||
|
||||
@ -174,6 +174,7 @@ class PyExecutor:
|
||||
self.profile_start_iters, self.profile_stop_iters = _load_iteration_indexes(
|
||||
PROFILE_START_STOP_ENV_VAR_NAME)
|
||||
self.gc_nvtx_watcher_handle = _gc_nvtx_watcher()
|
||||
self.is_warmup = False # During warmup, we don't enable the profiler
|
||||
|
||||
# related modules
|
||||
self.resource_manager = resource_manager
|
||||
@ -444,7 +445,7 @@ class PyExecutor:
|
||||
|
||||
def profile_step():
|
||||
nonlocal it, enabled, start_time
|
||||
if it in self.profile_stop_iters:
|
||||
if it in self.profile_stop_iters and not self.is_warmup:
|
||||
assert enabled, "Inconsistent CUDA profiling state"
|
||||
if enable_torch_trace:
|
||||
torch_profiler.stop()
|
||||
@ -470,7 +471,7 @@ class PyExecutor:
|
||||
|
||||
it += 1
|
||||
|
||||
if it in self.profile_start_iters:
|
||||
if it in self.profile_start_iters and not self.is_warmup:
|
||||
assert not enabled, "Inconsistent CUDA profiling state"
|
||||
torch.cuda.cudart().cudaProfilerStart()
|
||||
if enable_torch_trace:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user