mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-05 02:31:33 +08:00
[https://nvbugs/5740377][fix] Lock resource to fix potential access to released data (#10827)
Signed-off-by: Hui Gao <huig@nvidia.com>
This commit is contained in:
parent
d60d6ff6fd
commit
1592dfab6d
@ -368,6 +368,9 @@ private:
|
||||
std::optional<std::chrono::steady_clock::time_point::duration> mExpirationTime;
|
||||
// Hash for the event manager
|
||||
size_t mHash;
|
||||
|
||||
// Mutex for the next blocks
|
||||
mutable std::mutex mNextBlocksMutex;
|
||||
};
|
||||
|
||||
class GenerationRequest
|
||||
@ -1021,7 +1024,7 @@ private:
|
||||
std::shared_ptr<kv_connector::KvCacheConnectorManager> mKvCacheConnectorManager;
|
||||
|
||||
// Mutex for the cached blocks root
|
||||
std::mutex mCachedBlocksRootMutex;
|
||||
mutable std::mutex mCachedBlocksRootMutex;
|
||||
|
||||
// Record which sequence is using the block
|
||||
std::map<KVCacheBlock::IdType, LlmRequest::RequestIdType> mBlockToSequence;
|
||||
|
||||
@ -416,6 +416,7 @@ void KVCacheBlock::setPrevBlockInSeq(BlockPtr prevBlock)
|
||||
|
||||
void KVCacheBlock::addNextBlock(BlockKey const& blockKey, BlockPtr block)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mNextBlocksMutex);
|
||||
if (mNextBlocks.find(blockKey) == mNextBlocks.end())
|
||||
{
|
||||
mNextBlocks[blockKey] = std::move(block);
|
||||
@ -425,6 +426,8 @@ void KVCacheBlock::addNextBlock(BlockKey const& blockKey, BlockPtr block)
|
||||
std::tuple<bool, SizeType32, BlockPtr> KVCacheBlock::findMatchingBlock(
|
||||
BlockKey const& blockKey, bool enablePartialReuse, bool copyOnPartialReuse) const
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mNextBlocksMutex);
|
||||
|
||||
if (blockKey.uniqueTokens.size() == 0 || mNextBlocks.size() == 0)
|
||||
{
|
||||
return {false, 0, nullptr};
|
||||
@ -474,11 +477,13 @@ void KVCacheBlock::freeLeafBlock()
|
||||
|
||||
void KVCacheBlock::removeNextBlock(BlockKey const& blockKey)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mNextBlocksMutex);
|
||||
mNextBlocks.erase(blockKey);
|
||||
}
|
||||
|
||||
void KVCacheBlock::freeDescendantsRecursively()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mNextBlocksMutex);
|
||||
bool hasChildren = !mNextBlocks.empty();
|
||||
if (hasChildren)
|
||||
{
|
||||
@ -1176,6 +1181,7 @@ std::optional<BlockKey> WindowBlockManager::findNewContextBlock(
|
||||
auto blockKeys = buildBlockKeys(blockedUniqueTokens, llmRequest);
|
||||
BlockKey ret;
|
||||
ret.loraTaskId = llmRequest.getLoraTaskId();
|
||||
std::lock_guard<std::mutex> lock(mCachedBlocksRootMutex);
|
||||
auto searchRoot = mCachedBlocksRoot;
|
||||
for (auto const& blockKey : blockKeys)
|
||||
{
|
||||
|
||||
@ -239,13 +239,10 @@ full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_
|
||||
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm] SKIP (https://nvbugs/5721672)
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=True-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5741304)
|
||||
unittest/executor/test_rpc.py::TestRpcCorrectness::test_incremental_task_async SKIP (https://nvbugs/5741476)
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[pp4-fp8kv=True-attn_backend=TRTLLM-torch_compile=False] SKIP (https://nvbugs/5740377)
|
||||
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_cutlass-torch_compile=False] SKIP (https://nvbugs/5740377)
|
||||
test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5744432)
|
||||
test_e2e.py::test_trtllm_serve_multimodal_example SKIP (https://nvbugs/5747920)
|
||||
test_e2e.py::test_trtllm_serve_example SKIP (https://nvbugs/5747938)
|
||||
triton_server/test_triton.py::test_opt[opt] SKIP (https://nvbugs/5739981)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True-moe_backend=TRTLLM] SKIP (https://nvbugs/5740377)
|
||||
cpp/test_multi_gpu.py::TestDisagg::test_symmetric_executor[gpt-2proc-mpi_kvcache-90] SKIP (https://nvbugs/5755941)
|
||||
examples/test_granite.py::test_llm_granite[granite-3.0-1b-a400m-instruct-bfloat16] SKIP (https://nvbugs/5608979)
|
||||
examples/test_granite.py::test_llm_granite[granite-3.0-2b-instruct-bfloat16] SKIP (https://nvbugs/5608979)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user