mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5480289][fix] release slot manager in mtp MTPHiddenStatesManager (#7340)
Signed-off-by: Yue Weng <25103990+yweng0828@users.noreply.github.com>
This commit is contained in:
parent
4223a9aada
commit
9a4f60687f
@ -304,6 +304,7 @@ class CUDAGraphRunner:
|
||||
self.static_inputs.clear()
|
||||
self.graph_outputs.clear()
|
||||
self.graph_metadata.clear()
|
||||
self.padding_dummy_request = None
|
||||
del self.memory_pool
|
||||
self.memory_pool = None
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
@ -1042,6 +1042,13 @@ class SlotManager:
|
||||
slot = self.slot_mapping.pop(request_id)
|
||||
self.free_slots.add(slot)
|
||||
|
||||
def shutdown(self):
|
||||
req_ids_list = list(self.slot_mapping.keys())
|
||||
for rid in req_ids_list:
|
||||
self.remove_slot(rid)
|
||||
assert len(self.slot_mapping) == 0 and len(
|
||||
self.free_slots) == self.max_num_requests
|
||||
|
||||
|
||||
class ResourceManager:
|
||||
|
||||
|
||||
@ -85,7 +85,7 @@ class MTPHiddenStatesManager(BaseResourceManager):
|
||||
self.slot_manager.add_slot(rid)
|
||||
|
||||
def shutdown(self):
|
||||
pass
|
||||
self.slot_manager.shutdown()
|
||||
|
||||
def get_max_resource_count(self) -> int:
|
||||
return self.max_num_requests
|
||||
|
||||
Loading…
Reference in New Issue
Block a user