diff --git a/cpp/tensorrt_llm/executor/serialization.cpp b/cpp/tensorrt_llm/executor/serialization.cpp index ad41c1f176..7f347dc6e7 100644 --- a/cpp/tensorrt_llm/executor/serialization.cpp +++ b/cpp/tensorrt_llm/executor/serialization.cpp @@ -2519,6 +2519,7 @@ size_t Serialization::serializedSize(tensorrt_llm::batch_manager::kv_cache_manag totalSize += su::serializedSize(key.uniqueTokens); // std::vector where MmKey is pair, SizeType32> totalSize += su::serializedSize(key.extraKeys); + totalSize += su::serializedSize(key.cacheSaltID); return totalSize; } @@ -2528,6 +2529,7 @@ void Serialization::serialize(tensorrt_llm::batch_manager::kv_cache_manager::Blo su::serialize(key.loraTaskId, os); su::serialize(key.uniqueTokens, os); su::serialize(key.extraKeys, os); + su::serialize(key.cacheSaltID, os); } tensorrt_llm::batch_manager::kv_cache_manager::BlockKey Serialization::deserializeBlockKey(std::istream& is) @@ -2536,11 +2538,13 @@ tensorrt_llm::batch_manager::kv_cache_manager::BlockKey Serialization::deseriali auto loraTaskId = su::deserialize>(is); auto uniqueTokens = su::deserialize>(is); auto extraKeys = su::deserialize>(is); + auto cacheSaltID = su::deserialize>(is); tensorrt_llm::batch_manager::kv_cache_manager::BlockKey key; key.usesExtraIds = usesExtraIds; key.loraTaskId = std::move(loraTaskId); key.uniqueTokens = std::move(uniqueTokens); key.extraKeys = std::move(extraKeys); + key.cacheSaltID = std::move(cacheSaltID); return key; }