fix: add warmup flag into py_executor to prevent enable profiler during wa… (#3852)

* add warmup flag into py_executor to prevent enable profiler during warmup

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

* fix bug of pre-commit

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

* change setting warmup to all ranks

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

---------

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>
This commit is contained in:
bhsueh_NV 2025-04-27 19:22:42 +08:00 committed by GitHub
parent e2318756ed
commit 76f2c631fb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 2 deletions

View File

@ -206,6 +206,7 @@ def estimate_max_kv_cache_tokens(py_executor: PyExecutor,
req = create_dummy_context_requests(max_num_tokens, seq_len, vocab_size)
req_ids = py_executor.enqueue_requests(req)
req_ids = mpi_broadcast(req_ids, root=0)
py_executor.is_warmup = True
py_executor.start_worker()
py_executor.await_responses(req_ids)
# TODO check why call mpi_barrier() here will hang-on, but call mpi_allgather(0) is fine.
@ -250,6 +251,7 @@ def estimate_max_kv_cache_tokens(py_executor: PyExecutor,
py_executor.resource_manager.resource_managers.get(
"kv_cache_manager").shutdown()
py_executor.is_warmup = False
if py_executor.dist.mapping.rank == 0:
py_executor.shutdown()

View File

@ -174,6 +174,7 @@ class PyExecutor:
self.profile_start_iters, self.profile_stop_iters = _load_iteration_indexes(
PROFILE_START_STOP_ENV_VAR_NAME)
self.gc_nvtx_watcher_handle = _gc_nvtx_watcher()
self.is_warmup = False # During warmup, we don't enable the profiler
# related modules
self.resource_manager = resource_manager
@ -444,7 +445,7 @@ class PyExecutor:
def profile_step():
nonlocal it, enabled, start_time
if it in self.profile_stop_iters:
if it in self.profile_stop_iters and not self.is_warmup:
assert enabled, "Inconsistent CUDA profiling state"
if enable_torch_trace:
torch_profiler.stop()
@ -470,7 +471,7 @@ class PyExecutor:
it += 1
if it in self.profile_start_iters:
if it in self.profile_start_iters and not self.is_warmup:
assert not enabled, "Inconsistent CUDA profiling state"
torch.cuda.cudart().cudaProfilerStart()
if enable_torch_trace: