[https://nvbugs/5480289][fix] release slot manager in mtp MTPHiddenStatesManager (#7340)

Signed-off-by: Yue Weng <25103990+yweng0828@users.noreply.github.com>
This commit is contained in:
YueWeng 2025-09-03 10:37:51 +08:00 committed by GitHub
parent 4223a9aada
commit 9a4f60687f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 9 additions and 1 deletions

View File

@ -304,6 +304,7 @@ class CUDAGraphRunner:
self.static_inputs.clear()
self.graph_outputs.clear()
self.graph_metadata.clear()
self.padding_dummy_request = None
del self.memory_pool
self.memory_pool = None
torch.cuda.empty_cache()

View File

@ -1042,6 +1042,13 @@ class SlotManager:
slot = self.slot_mapping.pop(request_id)
self.free_slots.add(slot)
def shutdown(self):
req_ids_list = list(self.slot_mapping.keys())
for rid in req_ids_list:
self.remove_slot(rid)
assert len(self.slot_mapping) == 0 and len(
self.free_slots) == self.max_num_requests
class ResourceManager:

View File

@ -85,7 +85,7 @@ class MTPHiddenStatesManager(BaseResourceManager):
self.slot_manager.add_slot(rid)
def shutdown(self):
pass
self.slot_manager.shutdown()
def get_max_resource_count(self) -> int:
return self.max_num_requests