diff --git a/tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py b/tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py
index 0007b99ebd..ae25544b8b 100644
--- a/tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py
+++ b/tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py
@@ -304,6 +304,7 @@ class CUDAGraphRunner:
         self.static_inputs.clear()
         self.graph_outputs.clear()
         self.graph_metadata.clear()
+        self.padding_dummy_request = None
         del self.memory_pool
         self.memory_pool = None
         torch.cuda.empty_cache()
diff --git a/tensorrt_llm/_torch/pyexecutor/resource_manager.py b/tensorrt_llm/_torch/pyexecutor/resource_manager.py
index 3395490039..6298db0146 100644
--- a/tensorrt_llm/_torch/pyexecutor/resource_manager.py
+++ b/tensorrt_llm/_torch/pyexecutor/resource_manager.py
@@ -1042,6 +1042,13 @@ class SlotManager:
             slot = self.slot_mapping.pop(request_id)
             self.free_slots.add(slot)
 
+    def shutdown(self):
+        req_ids_list = list(self.slot_mapping.keys())
+        for rid in req_ids_list:
+            self.remove_slot(rid)
+        assert len(self.slot_mapping) == 0 and len(
+            self.free_slots) == self.max_num_requests
+
 
 class ResourceManager:
 
diff --git a/tensorrt_llm/_torch/speculative/mtp.py b/tensorrt_llm/_torch/speculative/mtp.py
index b31512df91..aa4ff35a16 100644
--- a/tensorrt_llm/_torch/speculative/mtp.py
+++ b/tensorrt_llm/_torch/speculative/mtp.py
@@ -85,7 +85,7 @@ class MTPHiddenStatesManager(BaseResourceManager):
             self.slot_manager.add_slot(rid)
 
     def shutdown(self):
-        pass
+        self.slot_manager.shutdown()
 
     def get_max_resource_count(self) -> int:
         return self.max_num_requests