Merge branch 'main' into update_mnnvl_test

2026-01-13 22:18:36 +08:00 · 2026-01-12 17:16:56 +08:00 · 2026-01-12 17:16:56 +08:00 · bfd34f19b3
commit bfd34f19b3
parent fbc91c47dc 11da7e3605
156 changed files with 4659 additions and 2308 deletions
--- a/cpp/kernels/xqa/defines.h
+++ b/cpp/kernels/xqa/defines.h
@ -129,6 +129,18 @@ static_assert(SPEC_DEC, "SPEC_Q_SEQ_LEN should only be used when SPEC_DEC is ena
 #define SLIDING_WINDOW 0
 #endif

+#ifndef SKIP_SOFTMAX_ATTN
+#define SKIP_SOFTMAX_ATTN 0
+#endif
+
+#ifndef SKIP_SOFTMAX_ATTN_BLOCK_STATS
+#define SKIP_SOFTMAX_ATTN_BLOCK_STATS 0
+#endif
+
+#ifndef SKIP_SOFTMAX_ATTN_FIX_THRESHOLD_GREATER_THAN_ONE
+#define SKIP_SOFTMAX_ATTN_FIX_THRESHOLD_GREATER_THAN_ONE 1
+#endif
+
 // 0 - no PDL
 // 1 - naive PDL
 // 2 - aggressive PDL (implemented only in mha_sm90.cu for now)
--- a/cpp/kernels/xqa/gmma.cuh
+++ b/cpp/kernels/xqa/gmma.cuh
@ -106,6 +106,7 @@ __device__ inline MatDesc makeMatDesc(void const* data, uint32_t dimKByteOffset,
        asm volatile("trap;\n");
        return 0;
    }();
+    assert(__cvta_generic_to_shared(data) % baseAlign == 0);
    uint32_t const baseOffset = ((patternAddr % baseAlign == 0) ? 0U : ((patternAddr >> 0x7) & 0x7));
    return MatDesc{
        /*addr=*/MatDesc::encode(__cvta_generic_to_shared(data)),
--- a/cpp/kernels/xqa/mha.cu
+++ b/cpp/kernels/xqa/mha.cu
@ -2734,6 +2734,25 @@ static constexpr auto kernel_mha = kernel_mha_impl;
 #endif

 #ifndef GENERATE_CUBIN
+uint32_t computeNbSubSeqPerSeqMHA(cudaDeviceProp const& prop, uint32_t batchSize, uint32_t nbKHeads, uint32_t maxSeqLen)
+{
+    if (!allowMultiBlockMode)
+    {
+        return 1;
+    }
+    auto const env = std::getenv("XQA_NB_SUB_SEQ");
+    if (env != nullptr)
+    {
+        int32_t const val = std::stoi(env);
+        if (val > 0)
+        {
+            return val;
+        }
+    }
+    return std::min<uint32_t>(
+        std::max<uint32_t>(1U, prop.multiProcessorCount / (batchSize * nbKHeads)), divUp(maxSeqLen, ctaTile.x));
+}
+
 void launchMHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
 #if SLIDING_WINDOW
    uint32_t slidingWinSize,
@ -2771,6 +2790,13 @@ void launchMHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
                                            // int8/fp8 KV cache.
 #if SPEC_DEC
    SpecDecParams const& specDecParams,
+#endif
+#if SKIP_SOFTMAX_ATTN
+    float const skipSoftmaxThresholdScaleFactor, // for compatibility with mha_sm90.cu only
+#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
+    uint32_t* __restrict__ skippedBlockCount,    // for compatibility with mha_sm90.cu only
+    uint32_t* __restrict__ totalBlockCount,      // for compatibility with mha_sm90.cu only
+#endif
 #endif
    uint32_t* semaphores, void* scratch, cudaStream_t stream)
 {
@ -2793,24 +2819,7 @@ void launchMHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
    uint32_t const nbQHeads = nbKHeads * headGrpSize;

    // const uint32_t nbSubSeqPerSeq = allowMultiBlockMode ? DBG_NB_CTAS_PER_SEQ : 1;
-    uint32_t const nbSubSeqPerSeq = [&]() -> uint32_t
-    {
-        if (!allowMultiBlockMode)
-        {
-            return 1;
-        }
-        auto const env = std::getenv("XQA_NB_SUB_SEQ");
-        if (env != nullptr)
-        {
-            int32_t const val = std::stoi(env);
-            if (val > 0)
-            {
-                return val;
-            }
-        }
-        return std::min<uint32_t>(
-            std::max<uint32_t>(1U, prop.multiProcessorCount / (batchSize * nbKHeads)), divUp(maxSeqLen, ctaTile.x));
-    }();
+    uint32_t const nbSubSeqPerSeq = computeNbSubSeqPerSeqMHA(prop, batchSize, nbKHeads, maxSeqLen);
    // gridDim.z == batchSize && gridDim.y == nbKHeads && gridDim.x == nbSubSeqPerSeq
 #if SPEC_DEC
    const uint32_t nbTokenBlocksPerGrp = divUp(qSeqLen * headGrpSize, rowsPerBlock);
--- a/cpp/kernels/xqa/mha.h
+++ b/cpp/kernels/xqa/mha.h
@ -90,6 +90,9 @@ struct BeamSearchParams
                                             // match trt-llm API.
 };

+uint32_t computeNbSubSeqPerSeqMHA(
+    cudaDeviceProp const& prop, uint32_t batchSize, uint32_t nbKHeads, uint32_t maxSeqLen);
+
 void launchMHA(cudaDeviceProp const& prop, uint32_t const nbKHeads,
 #if SLIDING_WINDOW
    uint32_t slidingWinSize,
@ -127,9 +130,18 @@ void launchMHA(cudaDeviceProp const& prop, uint32_t const nbKHeads,
                                            // int8/fp8 KV cache.
 #if SPEC_DEC
    SpecDecParams const& specDecParams,
+#endif
+#if SKIP_SOFTMAX_ATTN
+    float const skipSoftmaxThresholdScaleFactor,
+#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
+    uint32_t* __restrict__ skippedBlockCount, uint32_t* __restrict__ totalBlockCount,
+#endif
 #endif
    uint32_t* semaphores, void* scratch, cudaStream_t stream);

+uint32_t computeNbSubSeqPerSeqHopperF8MHA(
+    cudaDeviceProp const& prop, uint32_t batchSize, uint32_t nbKHeads, uint32_t maxSeqLen);
+
 void launchHopperF8MHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
 #if SLIDING_WINDOW
    uint32_t slidingWinSize,
@ -167,6 +179,12 @@ void launchHopperF8MHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
                                            // int8/fp8 KV cache.
 #if SPEC_DEC
    SpecDecParams const& specDecParams,
+#endif
+#if SKIP_SOFTMAX_ATTN
+    float const skipSoftmaxThresholdScaleFactor,
+#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
+    uint32_t* __restrict__ skippedBlockCount, uint32_t* __restrict__ totalBlockCount,
+#endif
 #endif
    uint32_t* semaphores, void* scratch, cudaStream_t stream);

--- a/cpp/kernels/xqa/mha_sm90.cu
+++ b/cpp/kernels/xqa/mha_sm90.cu
@ -49,6 +49,10 @@ static_assert(specDecQLen * headGrpSize <= 32, "SPEC_Q_SEQ_LEN macro value is to
 #define SWAP_AB (!SPEC_DEC)
 #endif

+#if SKIP_SOFTMAX_ATTN
+static_assert(SWAP_AB && USE_PAGED_KV_CACHE && !SPEC_DEC && BEAM_WIDTH == 1, "SKIP_SOFTMAX_ATTN is not supported.");
+#endif
+
 #define IS_SUPPORTED_F16_CASE (CACHE_ELEM_ENUM == 0 && !SPEC_DEC && SWAP_AB && !USE_INPUT_KV && !LOW_PREC_OUTPUT)

 inline constexpr bool swapAB = SWAP_AB;
@ -138,26 +142,38 @@ using PaddedOutHead = PaddedInputHead;

 struct alignas(128) SharedMem
 {
+    using QBuffer = Vec<Array2D<LdGrain, ctaNbQHeads, grainsPerQPart>, nbQParts>;
    using KBuffer = Array2D<LdGrain, gemm0CtaTileNbTokens, exactDiv(cacheHeadPartBytes, grainBytes)>;
-    static constexpr uint32_t nbKBuf = 2;
-    KBuffer k[nbKBuf]; // as is loaded from global mem.
    using XBuffer = Vec<Array2D<LdGrain, ctaNbQHeads, grainsPerXPart>, nbXParts>;
-    static constexpr uint32_t nbXBuf
-        = 2 * (gemm0CtaTileNbTokens >= gemm1CtaTileNbTokens ? 1 : exactDiv(gemm1CtaTileNbTokens, gemm0CtaTileNbTokens));
    using VBuffer = Vec<Array2D<LdGrain, gemm1CtaTileNbTokens, exactDiv(cacheHeadPartBytes, grainBytes),
                            sizeof(XBuffer) % (cacheHeadPartBytes * 8) == 0>,
        cacheHeadNbParts>;
 #if !SWAP_AB
    using VTBuffer = Array2D<LdGrain, headElems, exactDiv(gemm1CtaTileNbTokens, cacheElemsPerGrain), true>;
 #endif
-    static constexpr uint32_t nbVBuf = 2;
 #if CACHE_ELEM_ENUM == 0
    using OutSwizzleBuf = Array2D<LdGrain, ctaNbQHeads, grainsPerPaddedInputHead>;
 #elif CACHE_ELEM_ENUM == 2
    using OutSwizzleBuf = Array2D<Vec<Vec<InputElem, 4>, 4>, ctaNbQHeads, exactDiv(headElems, 4 * 4)>;
 #endif
+
+#if SKIP_SOFTMAX_ATTN
+    static constexpr uint32_t nbKBuf = 2;
+    static constexpr uint32_t nbVBuf = 3; // @fixme: skip_softmax_attn: for skip softmax attn, an extra VBuffer is used
+    static constexpr uint32_t nbXBuf
+        = 3 * (gemm0CtaTileNbTokens >= gemm1CtaTileNbTokens ? 1 : exactDiv(gemm1CtaTileNbTokens, gemm0CtaTileNbTokens));
+#else
+    static constexpr uint32_t nbKBuf = 2;
+    static constexpr uint32_t nbVBuf = 2;
+    static constexpr uint32_t nbXBuf
+        = 2 * (gemm0CtaTileNbTokens >= gemm1CtaTileNbTokens ? 1 : exactDiv(gemm1CtaTileNbTokens, gemm0CtaTileNbTokens));
+#endif
    static_assert(nbXBuf == nbVBuf);

+    // note: buffers used for GMMA may have additional alignment requirements
+    KBuffer k[nbKBuf]; // as is loaded from global mem.
+    QBuffer q;         // For gmma math. Conversion done if needed.
+
    union ReusedXVOutSwizzleBuf
    {
        struct XV
@ -196,9 +212,6 @@ struct alignas(128) SharedMem
        return reusedXVOutSwizzleBuf[i].outSwizzle;
    }

-    using QBuffer = Vec<Array2D<LdGrain, ctaNbQHeads, grainsPerQPart>, nbQParts>;
-    QBuffer q; // For gmma math. Conversion done if needed.
-
    // @fixme: move these into reusedXVOutSwizzleBuf
 #if SWAP_AB
    ShmQWiseVec xColMax[nbXBuf];
@ -220,6 +233,11 @@ struct alignas(128) SharedMem
    Vec<KVCachePageIndex, nbPagesPerTile> pages[2]; // one for K and one for V
 #endif

+#if SKIP_SOFTMAX_ATTN
+    uint32_t skipSoftmaxVotesGemm0ToV[nbXBuf];     // guarded by skipSoftmaxXBar
+    uint32_t skipSoftmaxVotesGemm0ToGemm1[nbXBuf]; // guarded by xBar
+#endif
+
    // mem barriers

    CtaBarrierPair qBar;
@ -229,6 +247,9 @@ struct alignas(128) SharedMem
    CtaBarrierPair vtBar[nbVBuf];
 #endif
    CtaBarrierPair xBar[nbXBuf];
+#if SKIP_SOFTMAX_ATTN
+    CtaBarrierPair skipSoftmaxXBar[nbXBuf]; // for V to wait for X to be ready
+#endif

    // used internally in the gemm0 warp group
    // @fixme: use separate arrive and wait for all usage
@ -425,8 +446,13 @@ __device__ void warpGrpApplyMask(Gemm0Acc& acc, SpecDec const& specDec,
 #endif

 #if SWAP_AB
+#if SKIP_SOFTMAX_ATTN
+__device__ RegColWiseVec computeWarpGrpColMax_sync(CtaBarrier& warpGrpBar, ShmQWiseVec& smemColMax, Gemm0Acc const& src,
+    float skipSoftmaxThreshold, uint32_t* smemSkipVote, bool maybeSkip);
+#else
 __device__ RegColWiseVec computeWarpGrpColMax_sync(
    CtaBarrier& warpGrpBar, ShmQWiseVec& smemColMax, Gemm0Acc const& src);
+#endif
 __device__ void warpGrpApplyMask(uint32_t warpRank, Gemm0Acc& acc, uint32_t validRowBeg, uint32_t validRowEnd);
 __device__ void warpGrpOnlineSoftmax(Gemm0Acc& acc, RegColWiseVec const& colMax);
 __device__ RegColWiseVec computeWarpColSum(Gemm0Acc& src);
@ -675,6 +701,12 @@ CUBIN_EXPORT __global__
 #endif
 #if SPEC_DEC
            SpecDecParams const specDecParams,
+#endif
+#if SKIP_SOFTMAX_ATTN
+            float const skipSoftmaxThresholdScaleFactor,
+#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
+            uint32_t* __restrict__ skippedBlockCount, uint32_t* __restrict__ totalBlockCount,
+#endif
 #endif
            uint32_t* __restrict__ const semaphores
            = nullptr, // [nbReq][nbKHeads][divUp(specDecParams.qSeqLen, inputTokensPerCta)]
@ -753,6 +785,10 @@ CUBIN_EXPORT __global__
    uint32_t const nbSubSeq = isMultiBlockMode ? mha::min(nbTilesInUse / multiBlockMinNbTilesPerCta, maxNbSubSeq) : 1;
    static_assert(multiBlockMinNbTiles >= multiBlockMinNbTilesPerCta * 2);
    assert(isMultiBlockMode == (nbSubSeq > 1));
+#if SKIP_SOFTMAX_ATTN
+    bool const disableSkipForShortSeq = (cacheSeqLen < skipSoftmaxThresholdScaleFactor);
+    float const skipSoftmaxThreshold = disableSkipForShortSeq ? 0.0f : skipSoftmaxThresholdScaleFactor / cacheSeqLen;
+#endif
    if (idxSubSeq >= nbSubSeq)
    {
        return;
@ -776,21 +812,34 @@ CUBIN_EXPORT __global__
    assert(dynamicSmemSize() >= sizeof(SharedMem));
    SharedMem& smem = *reinterpret_cast<SharedMem*>(&smemByteBuf[0]);

-    constexpr uint32_t nbBuffers = 2;
-    static_assert(nbBuffers == SharedMem::nbKBuf && nbBuffers == SharedMem::nbVBuf && nbBuffers == SharedMem::nbXBuf);
-    if (wid < nbBuffers)
+    constexpr uint32_t maxNbBuffers = (SharedMem::nbXBuf > SharedMem::nbVBuf) ? SharedMem::nbXBuf : SharedMem::nbVBuf;
+    static_assert(
+        maxNbBuffers >= SharedMem::nbKBuf && maxNbBuffers >= SharedMem::nbVBuf && maxNbBuffers >= SharedMem::nbXBuf);
+    if (wid < maxNbBuffers)
    {
        if (warpElectSync())
        {
-            smem.kBar[wid].initialize(gemm0NbThrds, gemm0NbThrds + warp_size);
-            smem.vBar[wid].initialize(gemm1NbThrds, gemm1NbThrds + warp_size);
-#if !SWAP_AB
-            smem.vtBar[wid].initialize(gemm1NbThrds * 2, gemm1NbThrds * 2);
+            if (wid < SharedMem::nbKBuf)
+            {
+                smem.kBar[wid].initialize(gemm0NbThrds, gemm0NbThrds + warp_size);
+            }
+            if (wid < SharedMem::nbXBuf)
+            {
+#if SKIP_SOFTMAX_ATTN
+                smem.skipSoftmaxXBar[wid].initialize(gemm0NbThrds + warp_size, gemm0NbThrds + warp_size);
+                smem.vBar[wid].initialize(gemm1NbThrds + warp_size, gemm1NbThrds + warp_size);
+#else
+                smem.vBar[wid].initialize(gemm1NbThrds, gemm1NbThrds + warp_size);
 #endif
-            smem.xBar[wid].initialize(gemm0NbThrds + gemm1NbThrds, gemm0NbThrds + gemm1NbThrds);
+
+#if !SWAP_AB
+                smem.vtBar[wid].initialize(gemm1NbThrds * 2, gemm1NbThrds * 2);
+#endif
+                smem.xBar[wid].initialize(gemm0NbThrds + gemm1NbThrds, gemm0NbThrds + gemm1NbThrds);
+            }
        }
    }
-    else if (wid == nbBuffers)
+    else if (wid == maxNbBuffers)
    {
        if (warpElectSync())
        {
@ -819,6 +868,10 @@ CUBIN_EXPORT __global__
        SpecDec const specDec{specDecParams, idxReq, idxInputSubSeq, cacheSeqLen};
 #endif

+#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
+        uint32_t localSkippedBlockCount = 0;
+#endif
+
        // QK gemm
        constexpr uint32_t nbGmmaInstM = exactDiv(gemm0CtaTileNbTokens, gmma::instM);
        using Acc = GmmaAcc<gemm0CtaTileNbTokens, ctaNbQHeads>;
@ -940,10 +993,39 @@ CUBIN_EXPORT __global__
                }
            }
 #endif
+
+            uint32_t const idxXBuf = idxIter % SharedMem::nbXBuf;
+            auto& xBar = smem.xBar[idxXBuf];
            // update colMax in shared mem and get a register copy
 #if SWAP_AB
+#if SKIP_SOFTMAX_ATTN
+            auto& skipSoftmaxXBar = smem.skipSoftmaxXBar[idxXBuf];
+            skipSoftmaxXBar.consumed.arrive_and_wait();
+
+            bool const maybeSkip = !disableSkipForShortSeq && idxIter != 0;
+            RegColWiseVec const colMax = computeWarpGrpColMax_sync(smem.gemm0WarpGrpBar, smem.gemm0CurrentSeqMax, acc,
+                skipSoftmaxThreshold, &smem.skipSoftmaxVotesGemm0ToV[idxXBuf], maybeSkip);
+            bool const shouldSkipSoftmaxAttn = static_cast<bool>(smem.skipSoftmaxVotesGemm0ToV[idxXBuf]);
+            unused(skipSoftmaxXBar.produced.arrive());
+            warpGrpOnlineSoftmax(acc, colMax);
+            if (shouldSkipSoftmaxAttn)
+            {
+                xBar.consumed.arrive_and_wait();
+                if (threadIdx.x == 0)
+                {
+                    smem.skipSoftmaxVotesGemm0ToGemm1[idxXBuf] = 1U;
+#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
+                    localSkippedBlockCount++;
+#endif
+                }
+                asm volatile("fence.proxy.async.shared::cta;\n"); // maybe not used
+                unused(xBar.produced.arrive());
+                continue;
+            }
+#else
            RegColWiseVec const colMax = computeWarpGrpColMax_sync(smem.gemm0WarpGrpBar, smem.gemm0CurrentSeqMax, acc);
            warpGrpOnlineSoftmax(acc, colMax);
+#endif
 #else
            RegRowWiseVec const rowMax = computeWarpGrpRowMax_sync(warpRank, smem.gemm0CurrentSeqMax, acc);
            warpGrpOnlineSoftmax(acc, rowMax);
@ -959,8 +1041,6 @@ CUBIN_EXPORT __global__
            // map 1 to fp8_max before conversion to fp8
            acc = acc * kE4M3_MAX;

-            uint32_t const idxXBuf = idxIter % SharedMem::nbXBuf;
-            auto& xBar = smem.xBar[idxXBuf];
            // @fixme: for fp16/bf16, try not to transpose acc here, and leave it to the next GEMM.
 #if SWAP_AB
            storeGemm0AccToShm(warpRank, laneId(), smem.xBuf(idxXBuf), xBar.consumed, acc);
@ -989,13 +1069,25 @@ CUBIN_EXPORT __global__
            storeShmRowWiseVec(warpRank, smem.xRowMax[idxXBuf], rowMax);
            storeShmRowWiseVec(warpRank, smem.xRowSum[idxXBuf], rowSum);
 #endif
-
+#if SKIP_SOFTMAX_ATTN
+            if (threadIdx.x == 0)
+            {
+                smem.skipSoftmaxVotesGemm0ToGemm1[idxXBuf] = 0;
+            }
+#endif
            __syncwarp();
            // the release semantics of arrive does not work for async consumers like gmma. additional fence is
            // needed.
            asm volatile("fence.proxy.async.shared::cta;\n");
            unused(xBar.produced.arrive());
        }
+#if SKIP_SOFTMAX_ATTN && SKIP_SOFTMAX_ATTN_BLOCK_STATS
+        if (threadIdx.x == 0 && skippedBlockCount != nullptr && totalBlockCount != nullptr)
+        {
+            atomicAdd(skippedBlockCount, localSkippedBlockCount);
+            atomicAdd(totalBlockCount, nbIters);
+        }
+#endif
        unused(smem.qBar.consumed.arrive());
    }
    else if (warpIdx.z == 1)
@ -1043,216 +1135,231 @@ CUBIN_EXPORT __global__
            uint32_t idxVTile = idxVTileInit + idxIter * nbSubSeq;
            auto const idxVBuf = idxIter % SharedMem::nbVBuf;
            auto const idxXBuf = idxVBuf;
-            auto& vBar = smem.vBar[idxVBuf];
-            arrive_tx_and_wait(vBar.produced, exactDiv(sizeof(SharedMem::VBuffer), gemm1NbThrds));
-            auto const& vBuf = smem.vBuf(idxVBuf);
-#if !SWAP_AB
-            CtaBarrierPair& vtBar = smem.vtBar[idxVBuf];
-            auto& vtBuf = smem.vtBuf(idxVBuf);
-            vtBar.consumed.arrive_and_wait();
-            transposeVTile(warpRank, laneId(), vtBuf, vBuf);
-            vBar.consumed.arrive();
-            vtBar.produced.arrive();
-#endif
            auto& xBar = smem.xBar[idxXBuf];
+            auto& vBar = smem.vBar[idxVBuf];
+            auto const& vBuf = smem.vBuf(idxVBuf);
            xBar.produced.arrive_and_wait();
+#if SKIP_SOFTMAX_ATTN
+            bool shouldSkipSoftmaxAttn = smem.skipSoftmaxVotesGemm0ToGemm1[idxXBuf]; // guarded by xBar
+            if (shouldSkipSoftmaxAttn)
+            {
+                vBar.produced.arrive_and_wait();
+            }
+#endif
+
+#if SKIP_SOFTMAX_ATTN
+            if (!shouldSkipSoftmaxAttn) // skip XVGemm
+#endif
+            {
+                arrive_tx_and_wait(vBar.produced, exactDiv(sizeof(SharedMem::VBuffer), gemm1NbThrds));
+#if !SWAP_AB
+                CtaBarrierPair& vtBar = smem.vtBar[idxVBuf];
+                auto& vtBuf = smem.vtBuf(idxVBuf);
+                vtBar.consumed.arrive_and_wait();
+                transposeVTile(warpRank, laneId(), vtBuf, vBuf);
+                vBar.consumed.arrive();
+                vtBar.produced.arrive();
+#endif
 #if !defined(NDEBUG) && DBG_PRINT
 #if SWAP_AB
-            if (threadIdx.x == 0)
-            {
-                printf("colMax:\n");
-                for (int i = 0; i < ctaNbQHeads; i++)
-                {
-                    printf("%f, ", smem.xColMax[idxXBuf][i]);
-                }
-                printf("\n");
-                printf("colSum:\n");
-                for (int n = 0; n < 4; n++)
+                if (threadIdx.x == 0)
                {
+                    printf("colMax:\n");
                    for (int i = 0; i < ctaNbQHeads; i++)
                    {
-                        printf("%f, ", smem.xColSum[idxXBuf][n][i]);
+                        printf("%f, ", smem.xColMax[idxXBuf][i]);
+                    }
+                    printf("\n");
+                    printf("colSum:\n");
+                    for (int n = 0; n < 4; n++)
+                    {
+                        for (int i = 0; i < ctaNbQHeads; i++)
+                        {
+                            printf("%f, ", smem.xColSum[idxXBuf][n][i]);
+                        }
+                        printf("\n");
+                    }
+                    printf("\n");
+                    printf("X:\n");
+                    for (int i = 0; i < ctaNbQHeads; i++)
+                    {
+                        for (int j = 0; j < gemm0CtaTileNbTokens; j++)
+                        {
+                            auto const& elemsPerXPart = (cacheElemsPerGrain * grainsPerXPart);
+                            auto const e = reinterpret_cast<Vec<__nv_fp8_e4m3, 16>&>(
+                                smem.xBuf(idxXBuf)[j / elemsPerXPart].template at<true>(
+                                    i, j % elemsPerXPart / cacheElemsPerGrain))[j % cacheElemsPerGrain];
+                            printf("%.2f, ", float(e));
+                            if (j % 16 == 15)
+                            {
+                                printf("| ");
+                            }
+                        }
+                        printf("\n\n");
+                    }
+                }
+                smem.gemm1WarpGrpBar.arrive_and_wait();
+#else
+                if (blockIdx.y == 1 && threadIdx.x == 0)
+                {
+                    printf("rowMax:\n");
+                    for (int i = 0; i < ctaNbQHeads; i++)
+                    {
+                        printf("%f, ", smem.xRowMax[idxXBuf][i]);
+                    }
+                    printf("\n");
+                    printf("rowSum:\n");
+                    for (int i = 0; i < ctaNbQHeads; i++)
+                    {
+                        printf("%f, ", smem.xRowSum[idxXBuf][i]);
                    }
                    printf("\n");
                }
-                printf("\n");
-                printf("X:\n");
-                for (int i = 0; i < ctaNbQHeads; i++)
-                {
-                    for (int j = 0; j < gemm0CtaTileNbTokens; j++)
-                    {
-                        auto const& elemsPerXPart = (cacheElemsPerGrain * grainsPerXPart);
-                        auto const e = reinterpret_cast<Vec<__nv_fp8_e4m3, 16>&>(
-                            smem.xBuf(idxXBuf)[j / elemsPerXPart].template at<true>(
-                                i, j % elemsPerXPart / cacheElemsPerGrain))[j % cacheElemsPerGrain];
-                        printf("%.2f, ", float(e));
-                        if (j % 16 == 15)
-                        {
-                            printf("| ");
-                        }
-                    }
-                    printf("\n\n");
-                }
-            }
-            smem.gemm1WarpGrpBar.arrive_and_wait();
-#else
-            if (blockIdx.y == 1 && threadIdx.x == 0)
-            {
-                printf("rowMax:\n");
-                for (int i = 0; i < ctaNbQHeads; i++)
-                {
-                    printf("%f, ", smem.xRowMax[idxXBuf][i]);
-                }
-                printf("\n");
-                printf("rowSum:\n");
-                for (int i = 0; i < ctaNbQHeads; i++)
-                {
-                    printf("%f, ", smem.xRowSum[idxXBuf][i]);
-                }
-                printf("\n");
-            }
-            smem.gemm1WarpGrpBar.arrive_and_wait();
+                smem.gemm1WarpGrpBar.arrive_and_wait();
 #endif
 #endif

 #if SWAP_AB
-            // @fixme: if first tile, no need to rescale acc. For persistent CTA, just re-initialize acc instead.
-            rescaleGemm1AccForNewColMax_sync(warpRank, smem.xColMax[idxXBuf], smem.xColSum[idxXBuf],
-                smem.gemm1AccColMax, acc, smem.gemm1AccColSum, smem.gemm1WarpGrpBar);
+                // @fixme: if first tile, no need to rescale acc. For persistent CTA, just re-initialize acc instead.
+                rescaleGemm1AccForNewColMax_sync(warpRank, smem.xColMax[idxXBuf], smem.xColSum[idxXBuf],
+                    smem.gemm1AccColMax, acc, smem.gemm1AccColSum, smem.gemm1WarpGrpBar);
 #else
-            rescaleGemm1AccForNewRowMax_sync(
-                warpRank, smem.xRowMax[idxXBuf], smem.xRowSum[idxXBuf], smem.gemm1AccColMax, acc, smem.gemm1AccColSum);
+                rescaleGemm1AccForNewRowMax_sync(warpRank, smem.xRowMax[idxXBuf], smem.xRowSum[idxXBuf],
+                    smem.gemm1AccColMax, acc, smem.gemm1AccColSum);
 #endif
-            auto& xBuf = smem.xBuf(idxXBuf);
+                auto& xBuf = smem.xBuf(idxXBuf);

-            auto const descXBase = gmma::makeMatDesc(nullptr, 0, SharedMem::XBuffer::Elem::rowBytes * 8,
-                gmma::getSwizzleMode<true>(SharedMem::XBuffer::Elem{}))
-                                       .raw();
+                auto const descXBase = gmma::makeMatDesc(nullptr, 0, SharedMem::XBuffer::Elem::rowBytes * 8,
+                    gmma::getSwizzleMode<true>(SharedMem::XBuffer::Elem{}))
+                                           .raw();
 #if CACHE_ELEM_ENUM == 0
-            auto const descVBase = gmma::makeMatDesc(nullptr, 0, SharedMem::VBuffer::Elem::rowBytes * 8,
-                gmma::getSwizzleMode<true>(SharedMem::VBuffer::Elem{}))
-                                       .raw();
+                auto const descVBase = gmma::makeMatDesc(nullptr, 0, SharedMem::VBuffer::Elem::rowBytes * 8,
+                    gmma::getSwizzleMode<true>(SharedMem::VBuffer::Elem{}))
+                                           .raw();
 #endif
 #if SWAP_AB
 //@fixme: to reduce code size, we can disable unroll and use double-buffer for LDSM in loadVTileTransposed.
 #pragma unroll
-            for (uint32_t idxInstK = 0; idxInstK < gemm1NbGmmaInstK; idxInstK++)
-            {
+                for (uint32_t idxInstK = 0; idxInstK < gemm1NbGmmaInstK; idxInstK++)
+                {
 #if CACHE_ELEM_ENUM == 2
-                Vec<RegMatAFrag, gemm1NbGmmaInstM> const fragA
-                    = loadVTileTransposed(warpRank, laneId(), vBuf, idxInstK);
+                    Vec<RegMatAFrag, gemm1NbGmmaInstM> const fragA
+                        = loadVTileTransposed(warpRank, laneId(), vBuf, idxInstK);
 #if !defined(NDEBUG) && DBG_PRINT
-                if (threadIdx.x == 0)
-                {
-                    printf("fragA:\nidxInstK == %u\n", idxInstK);
-                }
-                smem.gemm1WarpGrpBar.arrive_and_wait();
-                for (int m = 0; m < 2; m++)
-                {
-                    for (int w = 0; w < 4; w++)
+                    if (threadIdx.x == 0)
                    {
-                        if (warpRank == w)
+                        printf("fragA:\nidxInstK == %u\n", idxInstK);
+                    }
+                    smem.gemm1WarpGrpBar.arrive_and_wait();
+                    for (int m = 0; m < 2; m++)
+                    {
+                        for (int w = 0; w < 4; w++)
                        {
-                            if (laneId() == 0)
+                            if (warpRank == w)
                            {
-                                printf("    warpRank = %u\n", warpRank);
-                            }
-                            __syncwarp();
-                            for (int a = 0; a < 2; a++)
-                            {
-                                for (int b = 0; b < 8; b++)
+                                if (laneId() == 0)
                                {
-                                    for (int c = 0; c < 2; c++)
+                                    printf("    warpRank = %u\n", warpRank);
+                                }
+                                __syncwarp();
+                                for (int a = 0; a < 2; a++)
+                                {
+                                    for (int b = 0; b < 8; b++)
                                    {
-                                        for (int d = 0; d < 4; d++)
+                                        for (int c = 0; c < 2; c++)
                                        {
-                                            if (laneId() == b * 4 + d)
+                                            for (int d = 0; d < 4; d++)
                                            {
-                                                for (int e = 0; e < 4; e++)
+                                                if (laneId() == b * 4 + d)
                                                {
-                                                    auto const& elem4 = reinterpret_cast<__nv_fp8_e4m3 const(&)[4]>(
-                                                        fragA[m](0, c)(a, 0));
-                                                    printf("%.2f, ", float(elem4[e]));
+                                                    for (int e = 0; e < 4; e++)
+                                                    {
+                                                        auto const& elem4 = reinterpret_cast<__nv_fp8_e4m3 const(&)[4]>(
+                                                            fragA[m](0, c)(a, 0));
+                                                        printf("%.2f, ", float(elem4[e]));
+                                                    }
                                                }
+                                                __syncwarp();
                                            }
-                                            __syncwarp();
                                        }
+                                        if (laneId() == 0)
+                                        {
+                                            printf("\n");
+                                        }
+                                        __syncwarp();
                                    }
-                                    if (laneId() == 0)
+                                    if (laneId() == 0 && a == 0)
                                    {
-                                        printf("\n");
+                                        printf("----------------------\n");
                                    }
                                    __syncwarp();
                                }
-                                if (laneId() == 0 && a == 0)
-                                {
-                                    printf("----------------------\n");
-                                }
-                                __syncwarp();
                            }
+                            smem.gemm1WarpGrpBar.arrive_and_wait();
                        }
-                        smem.gemm1WarpGrpBar.arrive_and_wait();
                    }
-                }
 #endif
 #endif
-                BoundedVal<grainsPerInstK * gemm1NbGmmaInstK> const kOffsetInGrains{grainsPerInstK * idxInstK};
-                auto const descX = addAddr(descXBase,
-                    &xBuf[kOffsetInGrains.template divBy<SharedMem::XBuffer::Elem::cols>().get()](
-                        0, kOffsetInGrains.template mod<SharedMem::XBuffer::Elem::cols>().get()));
+                    BoundedVal<grainsPerInstK * gemm1NbGmmaInstK> const kOffsetInGrains{grainsPerInstK * idxInstK};
+                    auto const descX = addAddr(descXBase,
+                        &xBuf[kOffsetInGrains.template divBy<SharedMem::XBuffer::Elem::cols>().get()](
+                            0, kOffsetInGrains.template mod<SharedMem::XBuffer::Elem::cols>().get()));
 #if CACHE_ELEM_ENUM == 2
-                gmma::fence();
+                    gmma::fence();
 #endif
 #pragma unroll
-                for (uint32_t idxInstM = 0; idxInstM < gemm1NbGmmaInstM; idxInstM++)
-                {
+                    for (uint32_t idxInstM = 0; idxInstM < gemm1NbGmmaInstM; idxInstM++)
+                    {
 #if CACHE_ELEM_ENUM == 0
-                    auto const descV
-                        = addAddr(descVBase, &vBuf[idxInstM](kOffsetInGrains.get() * cacheElemsPerGrain, 0));
-                    gmma::mma_async_shmA<MathElem, ctaNbQHeads, true, false>(
-                        reinterpret_cast<float(&)[exactDiv(ctaNbQHeads, gmma::instNBase)][2][2]>(acc(idxInstM, 0)),
-                        descV, descX, true);
+                        auto const descV
+                            = addAddr(descVBase, &vBuf[idxInstM](kOffsetInGrains.get() * cacheElemsPerGrain, 0));
+                        gmma::mma_async_shmA<MathElem, ctaNbQHeads, true, false>(
+                            reinterpret_cast<float(&)[exactDiv(ctaNbQHeads, gmma::instNBase)][2][2]>(acc(idxInstM, 0)),
+                            descV, descX, true);
 #elif CACHE_ELEM_ENUM == 2
-                    gmma::mma_async_regA<MathElem, ctaNbQHeads>(
-                        reinterpret_cast<float(&)[exactDiv(ctaNbQHeads, gmma::instNBase)][2][2]>(acc(idxInstM, 0)),
-                        reinterpret_cast<uint32_t const(&)[2][2][1]>(fragA[idxInstM]), descX, true);
+                        gmma::mma_async_regA<MathElem, ctaNbQHeads>(
+                            reinterpret_cast<float(&)[exactDiv(ctaNbQHeads, gmma::instNBase)][2][2]>(acc(idxInstM, 0)),
+                            reinterpret_cast<uint32_t const(&)[2][2][1]>(fragA[idxInstM]), descX, true);
 #endif
+                    }
+                    gmma::commit_group();
+                    //@fixme: delay wait and consumption to next tile. Note that fragA must also persist until finish of
+                    // gmma.
+                    gmma::wait_group<0>();
                }
-                gmma::commit_group();
-                //@fixme: delay wait and consumption to next tile. Note that fragA must also persist until finish of
-                // gmma.
-                gmma::wait_group<0>();
-            }
 #else
-            auto const descVTBase = gmma::makeMatDesc(
-                nullptr, 0, SharedMem::VTBuffer::rowBytes * 8, gmma::getSwizzleMode<true>(SharedMem::VTBuffer{}))
-                                        .raw();
-            vtBar.produced.arrive_and_wait();
+                auto const descVTBase = gmma::makeMatDesc(
+                    nullptr, 0, SharedMem::VTBuffer::rowBytes * 8, gmma::getSwizzleMode<true>(SharedMem::VTBuffer{}))
+                                            .raw();
+                vtBar.produced.arrive_and_wait();
 // if (idxIter == 1 && threadIdx.x == 0) {
 //     printf("vtBuf:\n");
 //     dbg::printArray2D<__nv_fp8_e4m3, true>(vtBuf);
 // }
 #pragma unroll
-            for (uint32_t m = 0; m < Gemm1Acc::rows; m++)
-            {
-#pragma unroll
-                for (uint32_t k = 0; k < gemm1NbGmmaInstK; k++)
+                for (uint32_t m = 0; m < Gemm1Acc::rows; m++)
                {
-                    BoundedVal<grainsPerInstK * gemm1NbGmmaInstK> const kOffsetInGrains{grainsPerInstK * k};
-                    auto const descX = addAddr(descXBase,
-                        &xBuf[kOffsetInGrains.template divBy<SharedMem::XBuffer::Elem::cols>().get()](
-                            gmma::instM * m, kOffsetInGrains.template mod<SharedMem::XBuffer::Elem::cols>().get()));
-                    auto const descVT = addAddr(
-                        descVTBase, &vtBuf(0, kOffsetInGrains.template mod<SharedMem::VTBuffer::cols>().get()));
-                    gmma::mma_async_shmA<MathElem, headElems>(
-                        reinterpret_cast<float(&)[exactDiv(headElems, gmma::instNBase)][2][2]>(acc(m, 0)), descX,
-                        descVT, true);
+#pragma unroll
+                    for (uint32_t k = 0; k < gemm1NbGmmaInstK; k++)
+                    {
+                        BoundedVal<grainsPerInstK * gemm1NbGmmaInstK> const kOffsetInGrains{grainsPerInstK * k};
+                        auto const descX = addAddr(descXBase,
+                            &xBuf[kOffsetInGrains.template divBy<SharedMem::XBuffer::Elem::cols>().get()](
+                                gmma::instM * m, kOffsetInGrains.template mod<SharedMem::XBuffer::Elem::cols>().get()));
+                        auto const descVT = addAddr(
+                            descVTBase, &vtBuf(0, kOffsetInGrains.template mod<SharedMem::VTBuffer::cols>().get()));
+                        gmma::mma_async_shmA<MathElem, headElems>(
+                            reinterpret_cast<float(&)[exactDiv(headElems, gmma::instNBase)][2][2]>(acc(m, 0)), descX,
+                            descVT, true);
+                    }
                }
-            }
-            gmma::commit_group();
-            //@fixme: delay wait and consumption to next tile. Note that fragA must also persist until finish of gmma.
-            gmma::wait_group<0>();
+                gmma::commit_group();
+                //@fixme: delay wait and consumption to next tile. Note that fragA must also persist until finish of
+                // gmma.
+                gmma::wait_group<0>();
 #endif
+            }
+
            if (idxIter == nbIters - 1)
            {
                // gmma::wait_group should have already synchronized threads, so this may be unnecessary.
@ -1471,8 +1578,24 @@ CUBIN_EXPORT __global__
                    tensorMap
 #endif
            };
+#if SKIP_SOFTMAX_ATTN
+            for (auto& b : smem.skipSoftmaxXBar)
+            {
+                unused(b.consumed.arrive());
+            }
+#endif
            for (uint32_t idxIter = 0; idxIter < nbIters; idxIter++)
            {
+                uint32_t const idxVBuf = idxIter % SharedMem::nbVBuf;
+                auto& vBar = smem.vBar[idxVBuf];
+#if SKIP_SOFTMAX_ATTN
+                uint32_t idxXBuf = idxIter % SharedMem::nbXBuf;
+                auto& skipSoftmaxXBar = smem.skipSoftmaxXBar[idxXBuf];
+                skipSoftmaxXBar.produced.arrive_and_wait();
+                bool shouldSkipSoftmaxAttn = smem.skipSoftmaxVotesGemm0ToV[idxXBuf];
+                skipSoftmaxXBar.consumed.arrive();
+#endif
+
                uint32_t const idxVTile = idxVTileInit + idxIter * nbSubSeq;
                vTileLoader.loadPages(idxVTile);
 #if USE_INPUT_KV || ENABLE_PDL == 2
@ -1506,8 +1629,20 @@ CUBIN_EXPORT __global__
                }
 #endif

-                uint32_t const idxVBuf = idxIter % SharedMem::nbVBuf;
-                auto& vBar = smem.vBar[idxVBuf];
+#if SKIP_SOFTMAX_ATTN
+                if (shouldSkipSoftmaxAttn)
+                {
+                    vBar.consumed.arrive_and_wait();
+                    // compared to non-skip softmax attn, we need to increase vBar.produced count to avoid race
+                    // condition where vBar.consumed is arrived again without wait without skip softmax attn, XVGemm
+                    // will wait for tx_count, so its progress won't go ahead of vload warp with skip softmax attn,
+                    // XVGemm WG may go ahead of vload warp, as previous vBar only have XVGemm WG threads and a tx_count
+                    // (now = 0). Then it may arrive vBar.consumed before it is arrive_and_wait-ed
+                    vBar.produced.arrive();
+                    continue;
+                }
+#endif
+
                vBar.consumed.arrive_and_wait();
                if (warpElectSync())
                {
@ -1517,6 +1652,9 @@ CUBIN_EXPORT __global__
                        vTileLoader.loadData(smem.vBuf(idxVBuf)[idxPart], idxVTile, idxPart, vBar.produced);
                    }
                }
+#if SKIP_SOFTMAX_ATTN
+                vBar.produced.arrive();
+#endif
                __syncwarp();
            }
        }
@ -1992,9 +2130,23 @@ __device__ inline void warpGrpApplyMask(Gemm0Acc& acc, SpecDec const& specDec,
 #endif // SPEC_DEC

 // smemColMax is persistent across multiple iterations
+#if SKIP_SOFTMAX_ATTN
+__device__ inline RegColWiseVec computeWarpGrpColMax_sync(CtaBarrier& warpGrpBar, ShmQWiseVec& smemColMax,
+    Gemm0Acc const& src, float skipSoftmaxThreshold, uint32_t* smemSkipVote, bool maybeSkip)
+#else
 __device__ inline RegColWiseVec computeWarpGrpColMax_sync(
    CtaBarrier& warpGrpBar, ShmQWiseVec& smemColMax, Gemm0Acc const& src)
+#endif
 {
+#if SKIP_SOFTMAX_ATTN
+    if (threadIdx.x == 0)
+    {
+        *smemSkipVote = maybeSkip ? 1U : 0U; // will sync before vote
+    }
+    float const lnThreshold
+        = log(skipSoftmaxThreshold); // this can be -inf, but should be safe as we only use it for comparison
+#endif
+
    auto colMax = RegColWiseVec::filled(Vec<float, 2>::filled(safeInitRowMax));
 #pragma unroll
    for (uint32_t n = 0; n < src.cols; n++)
@ -2029,6 +2181,9 @@ __device__ inline RegColWiseVec computeWarpGrpColMax_sync(
    }

    uint32_t const lane = laneId();
+#if SKIP_SOFTMAX_ATTN
+    auto prevOrCurrentMax = RegColWiseVec();
+#if SKIP_SOFTMAX_ATTN_FIX_THRESHOLD_GREATER_THAN_ONE
    if (lane < 4)
    {
 #pragma unroll
@ -2037,12 +2192,43 @@ __device__ inline RegColWiseVec computeWarpGrpColMax_sync(
 #pragma unroll
            for (uint32_t j = 0; j < 2; j++)
            {
-                atomicMax(&smemColMax[8 * n + 2 * lane + j], colMax[n][j]);
+                prevOrCurrentMax[n][j] = smemColMax[8 * n + 2 * lane + j];
            }
        }
    }
    warpGrpBar.arrive_and_wait();
+#endif
+#endif
+
+    if (lane < 4)
+    {
+#pragma unroll
+        for (uint32_t n = 0; n < src.cols; n++)
+        {
+#pragma unroll
+            for (uint32_t j = 0; j < 2; j++)
+            {
+#if SKIP_SOFTMAX_ATTN && !SKIP_SOFTMAX_ATTN_FIX_THRESHOLD_GREATER_THAN_ONE
+                // prevOrCurrentMax <= actual smemColMax (after updates from all 4 warps done), but always >=
+                // smemColMax(Prev), the smemColMax value *before* this tile is computed.
+                // When determine whether to skip, it is safe to use prevOrCurrentMax: 1) all 4 warps' localmax <
+                // smemColMax(Prev), then prevOrCurrentMax == smemColMax(Prev), result not affected; 2) if some localmax
+                // > smemColMax(Prev), prevOrCurrentMax > smemColMax(Prev), some warps may incorrectly vote skip, but
+                // at least one warp whose localColMax is larger will not skip, then the tile is not skipped.
+                // This reduces some sync and check, but has issue when threshold > 1.
+                prevOrCurrentMax[n][j] = atomicMax(&smemColMax[8 * n + 2 * lane + j], colMax[n][j]);
+#else
+                atomicMax(&smemColMax[8 * n + 2 * lane + j], colMax[n][j]);
+#endif
+            }
+        }
+    }
+    warpGrpBar.arrive_and_wait();
+
    uint32_t const idxInQuad = lane % 4;
+#if SKIP_SOFTMAX_ATTN
+    bool localShouldSkip = true;
+#endif

 #pragma unroll
    for (uint32_t n = 0; n < src.cols; n++)
@ -2050,10 +2236,21 @@ __device__ inline RegColWiseVec computeWarpGrpColMax_sync(
 #pragma unroll
        for (uint32_t j = 0; j < GmmaAccCoreMat::cols; j++)
        {
+#if SKIP_SOFTMAX_ATTN
+            if (lane < 4 && 8 * n + 2 * idxInQuad + j < headGrpSize)
+            {
+                localShouldSkip &= (colMax[n][j] - prevOrCurrentMax[n][j]) < lnThreshold;
+            }
+#endif
            assert(colMax[n][j] <= smemColMax[8 * n + 2 * idxInQuad + j]);
            colMax[n][j] = smemColMax[8 * n + 2 * idxInQuad + j];
        }
    }
+
+#if SKIP_SOFTMAX_ATTN
+    atomicAnd(smemSkipVote, static_cast<uint32_t>(localShouldSkip)); // this will be translated to redux and voteu
+#endif
+
    warpGrpBar.arrive_and_wait();
    return colMax;
 }
@ -2199,7 +2396,7 @@ __device__ inline void storeGemm0AccToShm(
    uint32_t const idxOctInsideHalf = idxInHalf / 8;
    uint32_t const idxRowInsideOct = lane % 8;
    uint32_t const warpBaseC = 16 * warpRank;
-    auto const toAccCoords = [](uint32_t const idxAccCoreMat) -> std::pair<uint32_t, uint32_t>
+    auto const toAccCoords = [](uint32_t const idxAccCoreMat) -> mha::pair<uint32_t, uint32_t>
    {
        uint32_t const accR = idxAccCoreMat / Gemm0Acc::cols;
        uint32_t const accC = idxAccCoreMat % Gemm0Acc::cols;
@ -3231,6 +3428,24 @@ __device__ inline void storeRotatedPairsForQ(SharedMem::QBuffer& dst,
 }

 #ifndef GENERATE_CUBIN
+uint32_t computeNbSubSeqPerSeqHopperF8MHA(
+    cudaDeviceProp const& prop, uint32_t batchSize, uint32_t nbKHeads, uint32_t maxSeqLen)
+{
+    auto const env = std::getenv("XQA_NB_SUB_SEQ");
+    if (env != nullptr)
+    {
+        int32_t const val = std::stoi(env);
+        if (val > 0)
+        {
+            return val;
+        }
+    }
+    float const factor = 0.25f;
+    return mha::min<uint32_t>(
+        mha::max<uint32_t>(1U, (uint32_t) round(prop.multiProcessorCount * 3 / (batchSize * nbKHeads) * factor)),
+        divUp(maxSeqLen, gemm0CtaTileNbTokens));
+}
+
 void launchHopperF8MHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
 #if SLIDING_WINDOW
    uint32_t slidingWinSize,
@ -3268,6 +3483,12 @@ void launchHopperF8MHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
                                            // int8/fp8 KV cache.
 #if SPEC_DEC
    SpecDecParams const& specDecParams,
+#endif
+#if SKIP_SOFTMAX_ATTN
+    float const skipSoftmaxThresholdScaleFactor,
+#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
+    uint32_t* __restrict__ skippedBlockCount, uint32_t* __restrict__ totalBlockCount,
+#endif
 #endif
    uint32_t* semaphores, void* scratch, cudaStream_t stream)
 {
@ -3286,22 +3507,7 @@ void launchHopperF8MHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
    uint32_t const nbVHeads = nbKHeads;
    uint32_t const nbQHeads = nbKHeads * headGrpSize;
    uint32_t const nbQKVHeads = nbQHeads + nbKHeads + nbVHeads;
-    uint32_t const nbSubSeqPerSeq = [&]() -> uint32_t
-    {
-        auto const env = std::getenv("XQA_NB_SUB_SEQ");
-        if (env != nullptr)
-        {
-            int32_t const val = std::stoi(env);
-            if (val > 0)
-            {
-                return val;
-            }
-        }
-        float const factor = 0.25f;
-        return mha::min<uint32_t>(
-            mha::max<uint32_t>(1U, (uint32_t) round(prop.multiProcessorCount * 3 / (batchSize * nbKHeads) * factor)),
-            divUp(maxSeqLen, gemm0CtaTileNbTokens));
-    }();
+    uint32_t const nbSubSeqPerSeq = computeNbSubSeqPerSeqHopperF8MHA(prop, batchSize, nbKHeads, maxSeqLen);
 #if SPEC_DEC
    uint32_t const qSeqLen = specDecParams.qSeqLen;
 #else
@ -3371,6 +3577,12 @@ void launchHopperF8MHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
 #endif
 #if SPEC_DEC
        specDecParams,
+#endif
+#if SKIP_SOFTMAX_ATTN
+        skipSoftmaxThresholdScaleFactor,
+#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
+        skippedBlockCount, totalBlockCount,
+#endif
 #endif
        semaphores, scratch);
 #else
--- a/cpp/kernels/xqa/mha_stdheaders.cuh
+++ b/cpp/kernels/xqa/mha_stdheaders.cuh
@ -1272,6 +1272,19 @@ using is_void = is_same<remove_cv_t<T>, void>;
 template <typename T>
 inline constexpr bool is_void_v = is_void<T>::value;
 #endif
+
+#ifndef GENERATE_CUBIN
+template <typename T1, typename T2>
+using pair = std::pair<T1, T2>;
+#else
+template <typename T1, typename T2>
+struct pair
+{
+    T1 first;
+    T2 second;
+};
+#endif
+
 } // namespace mha

 #if GENERATE_CUBIN
--- a/cpp/kernels/xqa/test/refAttention.cpp
+++ b/cpp/kernels/xqa/test/refAttention.cpp
@ -50,7 +50,8 @@ using Vector = Matrix<Type, Size, 1>;
 template <typename MathElem, uint32_t tileSize, bool isPaged, bool useBeamSearch>
 Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor> refFlashAttention(IOHead const* q,
    CacheSeq<isPaged, useBeamSearch> const& k, CacheSeq<isPaged, useBeamSearch> const& v, uint32_t seqLen, float qScale,
-    float kvScale, float xScale, uint32_t slidingWinSize, float* attentionSinks)
+    float kvScale, float xScale, uint32_t slidingWinSize, float* attentionSinks, float skipSoftmaxThresholdScaleFactor,
+    uint32_t* skippedBlockCount, uint32_t* totalBlockCount, uint32_t multiBlockNum)
 {
    uint32_t const nbTiles = divUp(seqLen, tileSize);
    auto gemm1Acc = Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor>::Zero().eval();
@ -61,6 +62,16 @@ Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor> refFlashAt
    float const qkScale = qScale * kvScale / sqrtf(validElemsPerHead);
    uint32_t const seqBeg = (seqLen < slidingWinSize ? 0 : seqLen - slidingWinSize);
    uint32_t const idxTileBeg = seqBeg / tileSize;
+
+    uint32_t const nbSubSeq = (multiBlockNum > 0 && nbTiles >= 2) ? mha::min(nbTiles, multiBlockNum) : 1;
+    std::vector<Eigen::Vector<float, headGrpSize>> skipRowMaxs(nbSubSeq);
+    for (uint32_t i = 0; i < nbSubSeq; i++)
+    {
+        skipRowMaxs[i].fill(-INFINITY);
+    }
+    bool const disableSkipForShortSeq = (seqLen < skipSoftmaxThresholdScaleFactor);
+    float const skipSoftmaxThreshold = disableSkipForShortSeq ? 0.0f : skipSoftmaxThresholdScaleFactor / seqLen;
+
    for (uint32_t idxTile = idxTileBeg; idxTile < nbTiles; idxTile++)
    {
        Eigen::Matrix<float, headGrpSize, tileSize, Eigen::RowMajor> gemm0Acc;
@ -88,7 +99,22 @@ Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor> refFlashAt
            }
        }

-        Eigen::Vector<float, headGrpSize> const tileRowMax = gemm0Acc.rowwise().maxCoeff().cwiseMax(rowMax).eval();
+        Eigen::Vector<float, headGrpSize> const localRowMax = gemm0Acc.rowwise().maxCoeff().eval();
+        Eigen::Vector<float, headGrpSize> const tileRowMax = localRowMax.cwiseMax(rowMax).eval();
+        auto const prevSkipRowMax = skipRowMaxs[idxTile % nbSubSeq];
+        skipRowMaxs[idxTile % nbSubSeq] = localRowMax.cwiseMax(skipRowMaxs[idxTile % nbSubSeq]).eval();
+
+        if (!disableSkipForShortSeq && skipSoftmaxThreshold > 0)
+        {
+            *totalBlockCount += 1;
+            auto const skipSoftmaxMask = ((localRowMax - prevSkipRowMax).array() < std::log(skipSoftmaxThreshold));
+            bool const skipBlock = skipSoftmaxMask.all() && ((idxTile - idxTileBeg) >= nbSubSeq);
+            if (skipBlock)
+            {
+                *skippedBlockCount += 1;
+                continue;
+            }
+        }

        Eigen::Matrix<float, headGrpSize, tileSize, Eigen::RowMajor> tileX
            = (gemm0Acc.colwise() - tileRowMax).array().exp().eval();
@ -138,7 +164,8 @@ Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor> refFlashAt
    template Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor>                                     \
    refFlashAttention<prec, tileSize, isPaged, useBeamSearch>(IOHead const* q,                                         \
        CacheSeq<isPaged, useBeamSearch> const& k, CacheSeq<isPaged, useBeamSearch> const& v, uint32_t seqLen,         \
-        float qScale, float kvScale, float xScale, uint32_t slidingWinSize, float* attentionSinks)
+        float qScale, float kvScale, float xScale, uint32_t slidingWinSize, float* attentionSinks,                     \
+        float skipSoftmaxThreshold, uint32_t* skippedBlockCount, uint32_t* totalBlockCount, uint32_t multiBlockNum)

 INSTANTIATE_refFlashAttention(CacheElem, 64, false, false);
 INSTANTIATE_refFlashAttention(CacheElem, 64, false, true);
--- a/cpp/kernels/xqa/test/refAttention.h
+++ b/cpp/kernels/xqa/test/refAttention.h
@ -88,7 +88,8 @@ struct CacheSeq<true, true>
 template <typename MathElem, uint32_t tileSize, bool isPaged, bool useBeamSearch>
 Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor> refFlashAttention(IOHead const* q,
    CacheSeq<isPaged, useBeamSearch> const& k, CacheSeq<isPaged, useBeamSearch> const& v, uint32_t seqLen, float qScale,
-    float kvScale, float xScale, uint32_t slidingWinSize, float* attentionSinks);
+    float kvScale, float xScale, uint32_t slidingWinSize, float* attentionSinks, float skipSoftmaxThresholdScaleFactor,
+    uint32_t* skippedBlockCount, uint32_t* totalBlockCount, uint32_t multiBlockNum);

 template <typename MathElem, bool isPaged, bool useBeamSearch>
 #if SPEC_DEC
--- a/cpp/kernels/xqa/test/test.cpp
+++ b/cpp/kernels/xqa/test/test.cpp
@ -150,7 +150,8 @@ template <uint32_t nbKHeads>
 #endif
 #endif
 void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck, bool verbose = false,
-    bool saveData = false, bool hasAttentionSinks = false, uint32_t ctxLen = ~0U, uint32_t slidingWinSize = 1U << 30)
+    bool saveData = false, bool hasAttentionSinks = false, uint32_t ctxLen = ~0U, uint32_t slidingWinSize = 1U << 30,
+    float skipSoftmaxThresholdScaleFactor = 0.0f)
 {
 #if IS_MLA
    if (nbKHeads != 1)
@ -224,6 +225,12 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
        seqLen = (16U << 20) / gmemCacheHeadBytes; // 32MB per K+V head.
    }
    ctxLen = std::min(ctxLen, seqLen);
+    uint32_t skippedBlockCount = 0;
+    uint32_t totalBlockCount = 0;
+    if (skipSoftmaxThresholdScaleFactor > 0)
+    {
+        assert(useQGMMA);
+    }
    float const kScale = cacheElemSize == 2 ? 1.f : 1 / 4.f;
    float const vScale = kScale;
    float const qScale = 1.f;
@ -329,6 +336,17 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
    auto const rcpOutScale = ManagedMemBuf<float>(1);
    auto const seqLenList = ManagedMemBuf<uint32_t[beamWidth]>(batchSize);
    auto const ctxLenList = ManagedMemBuf<uint32_t[beamWidth]>(batchSize);
+#if SKIP_SOFTMAX_ATTN
+#ifdef SKIP_SOFTMAX_ATTN_BLOCK_STATS
+    auto const kernelSkippedBlockCount = ManagedMemBuf<uint32_t>(1);
+    auto const kernelTotalBlockCount = ManagedMemBuf<uint32_t>(1);
+    kernelSkippedBlockCount[0] = 0;
+    kernelTotalBlockCount[0] = 0;
+#endif
+#else
+    EXPECT_EQ(skipSoftmaxThresholdScaleFactor, 0.0f)
+        << "Got non-zero skipSoftmaxThresholdScaleFactor while SKIP_SOFTMAX_ATTN is not enabled.";
+#endif
 #if USE_PAGED_KV_CACHE
    auto const pageListBuf = ManagedMemBuf<std::byte>(pageListBytes);
 #if PAGED_KV_CACHE_LAYOUT == 1
@ -726,6 +744,11 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
            maxSeqLen, &seqLenList[0][0], batchSize, kvCacheScale.get(), semaphores.get(), scratch, stream);
    };
 #else
+    auto multiBlockNum = [&]()
+    {
+        auto const calcFunc = useQGMMA ? &computeNbSubSeqPerSeqHopperF8MHA : &computeNbSubSeqPerSeqMHA;
+        return calcFunc(prop, batchSize, nbKHeads, maxSeqLen);
+    }();
    auto runKernel = [&]()
    {
        auto const launchFunc = useQGMMA ? &launchHopperF8MHA : &launchMHA;
@ -776,6 +799,12 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
            batchSize, kvCacheScale.get(),
 #if SPEC_DEC
            specDecParams,
+#endif
+#if SKIP_SOFTMAX_ATTN
+            skipSoftmaxThresholdScaleFactor,
+#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
+            kernelSkippedBlockCount.get(), kernelTotalBlockCount.get(),
+#endif
 #endif
            semaphores.get(), scratch, stream);
        checkCuda(cudaGetLastError());
@ -813,6 +842,10 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
    checkCuda(cudaEventRecord(toc, stream));
    prefetchToDevice(cudaCpuDeviceId);
    checkCuda(cudaStreamSynchronize(stream));
+#if SKIP_SOFTMAX_ATTN && SKIP_SOFTMAX_ATTN_BLOCK_STATS
+    kernelSkippedBlockCount[0] /= nbIters;
+    kernelTotalBlockCount[0] /= nbIters;
+#endif
    if (testPerf)
    {
        float ms;
@ -849,6 +882,15 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
            = totalNbCacheLoadBytes + inputBytes + outputBytes; // we ignore page indices and beam search indices.
        float const dramSolTime = totalTraffic / bandwidth * 1E3f;
        float const dramSolRatio = dramSolTime / ms;
+#if SKIP_SOFTMAX_ATTN && SKIP_SOFTMAX_ATTN_BLOCK_STATS
+        size_t const totalNbCacheLoadWithSkip = gmemCacheHeadBytes
+            * (nbKHeads + nbVHeads * (1 - 1.0f * kernelSkippedBlockCount[0] / kernelTotalBlockCount[0]))
+            * nbLoadedCacheTokens;
+        float const totalTrafficWithSkip
+            = totalNbCacheLoadWithSkip + inputBytes + outputBytes; // we ignore page indices and beam search indices.
+        float const dramSolTimeWithSkip = totalTrafficWithSkip / bandwidth * 1E3f;
+        float const dramSolRatioWithSkip = dramSolTimeWithSkip / ms;
+#endif
        if (verbose)
        {
            printf("done\n");
@ -863,7 +905,13 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
        }
        float const tops = headGrpSize * qSeqLen * float(seqLen) * (validElemsPerKHead + validElemsPerVHead) * 2
            * nbKHeads * batchSize / (ms * 1E-3F) * 1E-12F;
+#if SKIP_SOFTMAX_ATTN && SKIP_SOFTMAX_ATTN_BLOCK_STATS
+        printf("kernel skippedBlockCount: %d/%d (%.2f%%)\n", kernelSkippedBlockCount[0], kernelTotalBlockCount[0],
+            kernelTotalBlockCount[0] == 0 ? 0.0f : 100.0f * kernelSkippedBlockCount[0] / kernelTotalBlockCount[0]);
+        printf("dramSolRatioWithSkip: %f%% (%f ms, TOPS = %f)\n", dramSolRatioWithSkip * 100, ms, tops);
+#else
        printf("dramSolRatio: %f%% (%f ms, TOPS = %f)\n", dramSolRatio * 100, ms, tops);
+#endif
    }
    if (refCheck)
    {
@ -1084,8 +1132,8 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
                    if (useQGMMA)
                    {
                        refOutput = refFlashAttention<CacheElem, 64>(&qHeads[req][b][headGrpSize * idxKHead], kCacheSeq,
-                            vCacheSeq, seqLen, qScaleForRef, kvCacheScale[0], xScale, slidingWinSize,
-                            refAttentionSinks);
+                            vCacheSeq, seqLen, qScaleForRef, kvCacheScale[0], xScale, slidingWinSize, refAttentionSinks,
+                            skipSoftmaxThresholdScaleFactor, &skippedBlockCount, &totalBlockCount, multiBlockNum);
                        // refOutput = refAttention<CacheElem>(&qHeads[req][b][headGrpSize * idxKHead], kCacheSeq,
                        // vCacheSeq, seqLen, qScaleForRef, kvCacheScale[0], xScale, slidingWinSize);
                    }
@ -1132,6 +1180,14 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
 #endif
            }
        }
+#if SKIP_SOFTMAX_ATTN
+        printf("host skippedBlockCount: %d/%d (%.2f%%)\n", skippedBlockCount, totalBlockCount,
+            totalBlockCount == 0 ? 0.0f : 100.0f * skippedBlockCount / totalBlockCount);
+#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
+        printf("kernel skippedBlockCount: %d/%d (%.2f%%)\n", kernelSkippedBlockCount[0], kernelTotalBlockCount[0],
+            kernelTotalBlockCount[0] == 0 ? 0.0f : 100.0f * kernelSkippedBlockCount[0] / kernelTotalBlockCount[0]);
+#endif
+#endif
        if (saveData)
        {
            fout_refOutput.close();
@ -1253,6 +1309,14 @@ TEST(RefCheck, llama_V2_70b)
 #if SLIDING_WINDOW
    runTest<2>(2, 4096, false, true, false, false, false, ~0, 256);
    runTest<2>(2, 400, false, true, false, false, false, ~0U, 256);
+#endif
+#if SKIP_SOFTMAX_ATTN
+    runTest<1>(32, 2048, false, true, false, false, false, ~0U, 1U << 30, 0.f);
+    runTest<4>(32, 1538, false, true, false, false, false, ~0U, 1U << 30, 1280.f);
+    runTest<2>(32, 4096, false, true, false, false, false, ~0U, 1U << 30, 125.f);
+    runTest<4>(32, 300, false, true, false, false, false, ~0U, 1U << 30, 80.f);
+    runTest<4>(32, 500, false, true, false, false, false, ~0U, 1U << 30, 501.0f);
+    runTest<4>(32, 500, false, true, false, false, false, ~0U, 1U << 30, 500.f);
 #endif
    runTest<8>(120, 367, false, true);
    runTest<8>(1792, 2048, false, true);
--- a/cpp/tensorrt_llm/common/attentionOp.cpp
+++ b/cpp/tensorrt_llm/common/attentionOp.cpp
@ -298,6 +298,11 @@ bool AttentionOp::convertMMHAParamsToXQAParams(tensorrt_llm::kernels::XQAParams&
    xqaParams.use_sparse_attention = useTllmGenSparseAttention();
    // Skip softmax threshold.
    xqaParams.skip_softmax_threshold_scale_factor = mSkipSoftmaxThresholdScaleFactorDecode;
+#ifdef SKIP_SOFTMAX_STAT
+    // Statistics of skip-softmax, pointers of device memory for output
+    xqaParams.skip_softmax_total_blocks = mSkipSoftmaxTotalBlocks;
+    xqaParams.skip_softmax_skipped_blocks = mSkipSoftmaxSkippedBlocks;
+#endif
    // Cross attention parameters.
    xqaParams.encoder_input_lengths = generationsParams.encoder_input_lengths;

--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/compileEngine.cpp
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/compileEngine.cpp
@ -105,7 +105,8 @@ CubinObj CompileEngine::compile() const
        // scratch in this case.
        /*use_input_kv=*/applyRoPEInXqaKernel,
        /*rope_style=*/ropeStyle,
-        /*is_spec_dec_tree=*/mXqaParams.is_spec_dec_tree};
+        /*is_spec_dec_tree=*/mXqaParams.is_spec_dec_tree,
+        /*use_skip_softmax_attn=*/mXqaParams.skip_softmax_threshold_scale_factor != 0};
    if (context.kernel_type == TLLM_XQA_JIT_MLA)
    {
        auto const& c = context;
--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/decoderXQAImplJIT.cpp
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/decoderXQAImplJIT.cpp
@ -232,6 +232,7 @@ void DecoderXQAImplJIT::runImpl(XQAParams const& xqaParams, KVCacheBuffer const&
    jit::CubinObj const* const cubinObj = mResource->getCubinObjRegistry()->getCubin(key);
    TLLM_CHECK(cubinObj != nullptr && cubinObj->isInitialized());
    bool const isSpecDec = xqaParams.multi_query_tokens;
+    bool const isSkipSoftmax = xqaParams.skip_softmax_threshold_scale_factor != 0;
    bool const isHMMAKernel = (cubinObj->getKernelType() == XQAKernelType::kAMPERE_WARP_SPECIALIZED);
    bool const isGMMAKernel = (cubinObj->getKernelType() == XQAKernelType::kHOPPER_WARP_SPECIALIZED);
    bool const isMLAKernel = (cubinObj->getKernelType() == XQAKernelType::kSM120_MLA);
@ -378,7 +379,7 @@ void DecoderXQAImplJIT::runImpl(XQAParams const& xqaParams, KVCacheBuffer const&
            .mask = reinterpret_cast<SpecDecParams::MaskType const*>(xqaParams.spec_decoding_packed_mask)};
    };

-    constexpr uint32_t kMAX_NB_KERNEL_PARAMS = 16;
+    constexpr uint32_t kMAX_NB_KERNEL_PARAMS = 19;
    uint32_t idxNextParam = 0;
    void* kernelParams[kMAX_NB_KERNEL_PARAMS];
    auto appendParam = [&](auto* p) mutable
@ -514,6 +515,16 @@ void DecoderXQAImplJIT::runImpl(XQAParams const& xqaParams, KVCacheBuffer const&
            appendParam(&specDecParams);
            specDecBlocks = divUp(specDecParams.qSeqLen, 64 / num_q_heads_over_kv);
        }
+        if (isSkipSoftmax)
+        {
+            TLLM_CHECK_WITH_INFO(isGMMAKernel, "skip softmax is only supported for GMMA kernel for now.");
+            TLLM_CHECK_WITH_INFO(!isSpecDec, "skip softmax is not supported with spec dec for now.");
+            appendParam(&xqaParams.skip_softmax_threshold_scale_factor);
+#ifdef SKIP_SOFTMAX_STAT
+            appendParam(&xqaParams.skip_softmax_total_blocks);
+            appendParam(&xqaParams.skip_softmax_skipped_blocks);
+#endif
+        }
        appendParam(&launchParams.semaphores);
        appendParam(&launchParams.scratch);
        kernelParams[idxNextParam] = nullptr; // one extra nullptr at end as guard.
--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/kernelUtils.cpp
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/kernelUtils.cpp
@ -96,10 +96,16 @@ bool supportConfigQGMMA(XQAParams const& xqaParams, int SM, bool forConfigurePlu
    {
        return false;
    }
-    if (xqaParams.kv_cache_data_type != DATA_TYPE_E4M3)
+    if (!contains({DATA_TYPE_FP16, DATA_TYPE_BF16, DATA_TYPE_E4M3}, xqaParams.kv_cache_data_type))
    {
        return false;
    }
+    bool const is_skip_softmax = xqaParams.skip_softmax_threshold_scale_factor != 0;
+    if (!is_skip_softmax && xqaParams.kv_cache_data_type != DATA_TYPE_E4M3)
+    {
+        // Only use hopper kernel with fp16/bf16 kv cache data type when skip softmax is enabled
+        return false;
+    }
    if (xqaParams.beam_width != 1)
    {
        return false;
@ -168,6 +174,11 @@ bool supportConfigHMMA(XQAParams const& xqaParams, int SM, bool forConfigurePlug
    {
        return false;
    }
+    bool const is_skip_softmax = xqaParams.skip_softmax_threshold_scale_factor != 0;
+    if (is_skip_softmax)
+    {
+        return false;
+    }
    return true;
 }

@ -201,6 +212,11 @@ bool supportConfigMLA(XQAParams const& xqaParams, int SM, bool forConfigurePlugi
    {
        return false;
    }
+    bool const is_skip_softmax = xqaParams.skip_softmax_threshold_scale_factor != 0;
+    if (is_skip_softmax)
+    {
+        return false;
+    }
    return true;
 }

--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/include/nvrtcWrapper.h
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/include/nvrtcWrapper.h
@ -66,6 +66,7 @@ extern "C"

        bool is_spec_dec_tree
            = true; // useful only when multi_query_tokens, should be true unless using linear tree in spec-dec.
+        bool use_skip_softmax_attn;
    } tllmXqaJitContext;

    // tllmXqaJitProgram is an opaque handle for a program.
--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src/nvrtcWrapper.cpp
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src/nvrtcWrapper.cpp
@ -215,6 +215,10 @@ tllmXqaJitStatus getMacroFlags(tllmXqaJitContext const* context, std::vector<std
    macros["USE_INPUT_KV"] = context->use_input_kv ? "1" : "0";
    macros["ROPE_STYLE"] = std::to_string(int(context->rope_style));
    macros["IS_SPEC_DEC_TREE"] = context->is_spec_dec_tree ? "1" : "0";
+    macros["SKIP_SOFTMAX_ATTN"] = context->use_skip_softmax_attn ? "1" : "0";
+#ifdef SKIP_SOFTMAX_STAT
+    macros["SKIP_SOFTMAX_ATTN_BLOCK_STATS"] = context->use_skip_softmax_attn ? "1" : "0";
+#endif

    // Without these macros, NVRTC uses precompiled headers for cuda_fp16.h etc.
    // Linking might fail due to ABI incompatibility.
--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplPrecompiled.cpp
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplPrecompiled.cpp
@ -493,6 +493,10 @@ bool DecoderXQAImplPrecompiled::shouldUse(XQAParams const& xqaParams, bool forCo
    {
        SUPPORT_RETURN_FALSE("streaming-llm");
    }
+    if (xqaParams.skip_softmax_threshold_scale_factor != 0)
+    {
+        SUPPORT_RETURN_FALSE("skip_softmax_threshold_scale_factor");
+    }

    // OPTIMIZE: For the standard generation-phase MHA, there are still extra limitations.
    // NOTE: Medusa mode = Multi_query_tokens > 1.
--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/tensorMapUtils.cpp
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/tensorMapUtils.cpp
@ -64,6 +64,21 @@ CUtensorMapSwizzle getSwizzleMode(uint32_t partBytes)
    }
 };

+CUtensorMapDataType_enum getDataTypeFromXqaParams(XQAParams const& xqaParams)
+{
+    if (xqaParams.kv_cache_data_type == DATA_TYPE_BF16)
+    {
+        return CU_TENSOR_MAP_DATA_TYPE_BFLOAT16;
+    }
+    else if (xqaParams.kv_cache_data_type == DATA_TYPE_FP16)
+    {
+        return CU_TENSOR_MAP_DATA_TYPE_FLOAT16;
+    }
+    TLLM_CHECK(xqaParams.kv_cache_data_type == DATA_TYPE_E4M3 || xqaParams.kv_cache_data_type == DATA_TYPE_E5M2
+        || xqaParams.kv_cache_data_type == DATA_TYPE_INT8);
+    return CU_TENSOR_MAP_DATA_TYPE_UINT8;
+}
+
 CUtensorMap makeTensorMapForQ(std::shared_ptr<CUDADriverWrapper> const& driver, void const* addr,
    CUtensorMapDataType_enum dataType, uint32_t headElems, uint32_t totalNbHeads, uint32_t partElems, uint32_t boxHeads)
 {
@ -131,24 +146,26 @@ CUtensorMap makeTensorMapForHopperXqaKVCache(
    if constexpr (std::is_same_v<KVCacheBuffer, KVBlockArray>)
    {
        uint32_t const headElems = xqaParams.head_size;
-        uint32_t const elemBytes = getElemBytes(CU_TENSOR_MAP_DATA_TYPE_UINT8);
+        CUtensorMapDataType_enum const dataType = getDataTypeFromXqaParams(xqaParams);
+        uint32_t const elemBytes = getElemBytes(dataType);
        TLLM_CHECK(headElems <= 256);
        uint32_t const paddedHeadElems = headElems <= 64 ? 64 : (headElems <= 128 ? 128 : 256);
        uint32_t const partElems = std::min(elemBytes * paddedHeadElems, 128U) / elemBytes;
-        return makeTensorMapForPagedKVCache(driver, kv_cache_buffer.mPrimaryPoolPtr, CU_TENSOR_MAP_DATA_TYPE_UINT8,
-            xqaParams.head_size, xqaParams.num_kv_heads, xqaParams.tokens_per_block, partElems);
+        return makeTensorMapForPagedKVCache(driver, kv_cache_buffer.mPrimaryPoolPtr, dataType, xqaParams.head_size,
+            xqaParams.num_kv_heads, xqaParams.tokens_per_block, partElems);
    }
    else
    {
        static_assert(std::is_same_v<KVCacheBuffer, KVLinearBuffer>);
        uint32_t const headElems = xqaParams.head_size;
-        uint32_t const elemBytes = getElemBytes(CU_TENSOR_MAP_DATA_TYPE_UINT8);
+        CUtensorMapDataType_enum const dataType = getDataTypeFromXqaParams(xqaParams);
+        uint32_t const elemBytes = getElemBytes(dataType);
        TLLM_CHECK(headElems <= 256);
        uint32_t const paddedHeadElems = headElems <= 64 ? 64 : (headElems <= 128 ? 128 : 256);
        uint32_t const partElems = std::min(elemBytes * paddedHeadElems, 128U) / elemBytes;
-        return makeTensorMapForContiguousKVCache(driver, kv_cache_buffer.data, CU_TENSOR_MAP_DATA_TYPE_UINT8,
-            xqaParams.head_size, xqaParams.num_kv_heads, xqaParams.max_attention_window_size, xqaParams.beam_width,
-            xqaParams.batch_size, partElems);
+        return makeTensorMapForContiguousKVCache(driver, kv_cache_buffer.data, dataType, xqaParams.head_size,
+            xqaParams.num_kv_heads, xqaParams.max_attention_window_size, xqaParams.beam_width, xqaParams.batch_size,
+            partElems);
    }
 }

@ -161,11 +178,12 @@ template <typename KVCacheBuffer>
 CUtensorMap makeTensorMapForXqaMlaKVCache(std::shared_ptr<tensorrt_llm::common::CUDADriverWrapper> const& driver,
    XQAParams const& xqaParams, KVCacheBuffer const& kv_cache_buffer, bool forK)
 {
+    CUtensorMapDataType_enum const dataType = getDataTypeFromXqaParams(xqaParams);
    uint32_t const partElems = (forK ? 64 : 128);
    if constexpr (std::is_same_v<KVCacheBuffer, KVBlockArray>)
    {
-        return makeTensorMapForPagedKVCache(driver, kv_cache_buffer.mPrimaryPoolPtr, CU_TENSOR_MAP_DATA_TYPE_UINT8,
-            xqaParams.head_size, xqaParams.num_kv_heads, xqaParams.tokens_per_block, partElems);
+        return makeTensorMapForPagedKVCache(driver, kv_cache_buffer.mPrimaryPoolPtr, dataType, xqaParams.head_size,
+            xqaParams.num_kv_heads, xqaParams.tokens_per_block, partElems);
    }
    else
    {
@ -183,7 +201,7 @@ CUtensorMap makeTensorMapForXqaMlaQ(
    std::shared_ptr<tensorrt_llm::common::CUDADriverWrapper> const& driver, XQAParams const& xqaParams, void const* q)
 {
    uint32_t const partElems = 64;
-    return makeTensorMapForQ(driver, q, CU_TENSOR_MAP_DATA_TYPE_UINT8, xqaParams.head_size,
+    return makeTensorMapForQ(driver, q, getDataTypeFromXqaParams(xqaParams), xqaParams.head_size,
        xqaParams.num_q_heads * xqaParams.total_num_input_tokens, partElems, xqaParams.num_q_heads);
 }
 } // namespace kernels
--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/xqaParams.h
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/xqaParams.h
@ -119,7 +119,12 @@ struct XQAParams
    bool use_sparse_attention = false;

    // Skip softmax threshold.
-    float skip_softmax_threshold_scale_factor = 0.0f;
+    float skip_softmax_threshold_scale_factor = 0;
+
+#ifdef SKIP_SOFTMAX_STAT
+    uint32_t* skip_softmax_total_blocks = nullptr;
+    uint32_t* skip_softmax_skipped_blocks = nullptr;
+#endif

    cudaStream_t stream = 0;
    // layer index
@ -199,6 +204,10 @@ struct XQAParams
           << "sparse_params: " << sparse_params.toString() << std::endl
           << "use_sparse_attention :" << (use_sparse_attention ? "true" : "false") << std ::endl
           << "skip_softmax_threshold_scale_factor :" << skip_softmax_threshold_scale_factor << std ::endl
+#ifdef SKIP_SOFTMAX_STAT
+           << "skip_softmax_total_blocks :" << skip_softmax_total_blocks << std ::endl
+           << "skip_softmax_skipped_blocks :" << skip_softmax_skipped_blocks << std ::endl
+#endif
           << "stream :" << stream;

        return ss.str();
--- a/docs/source/blogs/media/tech_blog15_ds32_wide_ep.png
+++ b/docs/source/blogs/media/tech_blog15_ds32_wide_ep.png
--- a/docs/source/blogs/media/tech_blog15_dsa_architecture.png
+++ b/docs/source/blogs/media/tech_blog15_dsa_architecture.png
--- a/docs/source/blogs/media/tech_blog15_indexer_topk.png
+++ b/docs/source/blogs/media/tech_blog15_indexer_topk.png
--- a/docs/source/blogs/media/tech_blog15_radix_select_topk.png
+++ b/docs/source/blogs/media/tech_blog15_radix_select_topk.png
--- a/docs/source/blogs/tech_blog/blog15_Optimizing_DeepSeek_V32_on_NVIDIA_Blackwell_GPUs.md
+++ b/docs/source/blogs/tech_blog/blog15_Optimizing_DeepSeek_V32_on_NVIDIA_Blackwell_GPUs.md
@ -0,0 +1,423 @@
+# Optimizing DeepSeek-V3.2 on NVIDIA Blackwell GPUs
+By NVIDIA TensorRT LLM team
+
+## Table of Contents
+- [Optimizing DeepSeek-V3.2 on NVIDIA Blackwell GPUs](#optimizing-deepseek-v32-on-nvidia-blackwell-gpus)
+    - [Table of Contents](#table-of-contents)
+    - [Introduction](#introduction)
+    - [DeepSeek Sparse Attention (DSA)](#deepseek-sparse-attention-dsa)
+    - [Precision Strategy](#precision-strategy)
+    - [Parallel Strategy](#parallel-strategy)
+    - [Key Features](#key-features)
+        - [MTP](#mtp)
+        - [Disaggregated Serving](#disaggregated-serving)
+        - [Chunked Prefill and KV Cache Reuse](#chunked-prefill-and-kv-cache-reuse)
+        - [Wide Expert Parallelism (Wide-EP)](#wide-expert-parallelism-wide-ep)
+		- [Chat Template and Tool Parser](#chat-template-and-tool-parser)
+    - [Key Optimizations](#key-optimizations)
+        - [Kernel Optimizations](#kernel-optimizations)
+            - [Sparse MLA Kernel](#sparse-mla-kernel)
+            - [Indexer Top-K Kernel](#indexer-top-k-kernel)
+            - [DeepGEMM MQA Kernel](#deepgemm-mqa-kernel)
+            - [Kernel Fusion](#kernel-fusion)
+        - [System Optimizations](#system-optimizations)
+            - [Multi-steams](#multi-steams)
+            - [A Fast Path for Short Sequences](#a-fast-path-for-short-sequences)
+    - [How to Reproduce](#how-to-reproduce)
+        - [Accuracy Evaluation](#accuracy-evaluation)
+        - [Benchmark on B200](#benchmark-on-b200)
+            - [Min-latency](#min-latency)
+            - [Max-throughput](#max-throughput)
+        - [Benchmark with Wide-EP on GB200](#benchmark-with-wide-ep-on-gb200)
+    - [Future Works](#future-works)
+    - [Acknowledgement](#acknowledgement)
+
+## Introduction
+The open-sourced [DeepSeek-V3.2](https://api-docs.deepseek.com/news/news251201) series models proposed a new architecture with a fine-grained sparse attention mechanism, called DeepSeek Sparse Attention (DSA). It can help the DeepSeek-V3.2 model achieve better efficiency, especially in long sequence scenarios. Although DSA uses a lightweight indexer for prediction, realizing actual speedup from attention sparsity is still challenging. This blog introduces how TensorRT LLM supports key LLM inference features for DeepSeek-v3.2 and optimizes its performance on NVIDIA Blackwell GPUs.
+
+## DeepSeek Sparse Attention (DSA)
+DSA serves as a core component of the DeepSeek-v3.2 model, and it is the only architectural modification compared to its predecessors (DeepSeek-V3/R1/V3.1). It is a fine-grained sparse attention mechanism that only selects the important key-value entries for attention computation.
+
+<div align="center">
+<figure>
+  <img src="https://github.com/NVIDIA/TensorRT-LLM/raw/main/docs/source/blogs/media/tech_blog15_dsa_architecture.png" alt="tech_blog15_dsa_architecture" width="700" height="auto">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 1. The architecture of DSA. The green part illustrates how DSA selects the Top-K key-value entries according to the indexer.</em></sub></p>
+
+Figure 1 illustrates the overall architecture: a lightning indexer first determines the importance of all key-value entries for each query token. Subsequently, the Top-K Selector retains only the top-$k$ entries (typically $k=2048$) based on the index scores. Finally, attention is computed exclusively between the query token and these selected entries.
+
+<div align="center">
+<figure>
+  <img src="https://github.com/NVIDIA/TensorRT-LLM/raw/main/docs/source/blogs/media/tech_blog15_indexer_topk.png" alt="tech_blog15_indexer_topk" width="900" height="auto">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 2. The architecture of the DSA indexer and Top-K logics.</em></sub></p>
+
+Figure 2 illustrates the DSA indexer and the Top-K selection mechanism. Firstly, two low-rank linear layers project $c_t^Q$ and the input $h_t$ into lower-dimensional tensors. Following operations of LayerNorm to the K tensor and RoPE to both Q and K, we obtain the tensors $Q_t^I$ and $K_t^I$. Simultaneously, a separate weight projection layer processes $h_t$ to generate the weights $W_t^I$. These tensors are then used to compute the index scores (labeled as MQA Logits in Figure 2):
+
+$$I_{t} = \sum_{j=1}^{h}W_j^I \cdot \text{ReLU}(Q_{t, j}^I (K_t^I)^T)$$
+
+Finally, a Top-K operation is applied to the index scores to identify the most relevant indices, which are subsequently used for the sparse MLA computation. To reduce computational overhead, the K tensor $K_t^I$ is stored in the indexer K cache, allowing for reuse in subsequent iterations.
+ 
+Regarding implementation, DSA diverges from the MLA used in DeepSeek-V3/R1/V3.1 models, which alternates between MHA mode (prefill) and MQA mode (decoding) as discussed in [Tech Blog 3](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md). Instead, our current DSA implementation operates only in MQA mode for both prefill and decoding phases to maximize kernel efficiency. We are continuing to explore further optimizations, including potential support for MHA mode in future iterations.
+
+The DSA implementation is built upon the TensorRT LLM sparse attention framework, which is designed to provide flexible and extensible support for various sparse attention methods. For more information, please refer to the [sparse attention documentation](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/features/sparse-attention.md), and a technical blog providing further details will be released soon.
+
+## Precision Strategy
+Because the DSA is the only architectural modification of DeepSeek-V3.2 from the DeepSeek-R1 model, the mixed precision recipe for other modules is the same as what is used for the DeepSeek-R1. This is the NVFP4 precision strategy used in the DSA module:
+- Indexer
+    - Low-rank linear layers: BF16
+    - Weight projection layer: FP32, for model accuracy
+    - MQA:
+        - Indexer K cache: Blockwise FP8
+        - Math: Blockwise FP8
+    - Top-K: FP32
+- QKV projection layer: BF16
+- Output projection layer: NVFP4
+- Sparse MLA
+    - KV cache: Per-tensor FP8
+    - Math: Per-tensor FP8
+
+The MoE layers use NVFP4, which is the same as the DeepSeek-R1. Please refer to [Tech Blog 1](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md) and [Tech Blog 3](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md) for the MoE precision strategy. In addition to the NVFP4 version of DeepSeek-V3.2, TensorRT-LLM also supports the original FP8 model, as well as both BF16 and per-tensor FP8 KV caches.
+
+We evaluated the accuracy of this NVFP4 checkpoint on the same datasets:
+|                    | GSM8k | MMLU  | GPQA-Diamond |
+| :----------------- | :---- | :---- | :----------- |
+| [deepseek-ai/DeepSeek-V3.2](https://huggingface.co/deepseek-ai/DeepSeek-V3.2)   | 95.91 | 87.84 | 84.34        |
+| nvidia/DeepSeek-V3.2-NVFP4<sup>*</sup> | 95.26 | 87.54 | 84.85        |
+
+<sub><em>\* Currently, the NVFP4 checkpoint has not yet been published on Hugging Face. Please stay tuned, or refer to the [How to reproduce](#how-to-reproduce) section to learn how to quantize the model to NVFP4.  
+** Note there are some run-to-run variance for these evaluations. Our experiments indicate that the NVFP4 recipe delivers accuracy on par with FP8 on these datasets.</em></sub>
+
+## Parallel Strategy
+To achieve optimal throughput, DeepSeek-V3.2 adopts the same parallel strategy as DeepSeek-R1. Please refer to [Tech Blog 3](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md) for a detailed explanation of the performance benefits:
+| Components         | Parallel Patterns           |
+| :----------------- | :-------------------------- |
+| Attention Modules  | Data Parallelism 8 (DP8)    |
+| MoE Sparse Experts | Expert Parallelism 8 (EP8)  |
+| MoE Shared Experts | DP8                         |
+| Router GEMM        | DP8                         |
+ 
+To scale DeepSeek-V3.2 inference on high-performance systems such as the GB200 NVL72, the model also leverages the parallel strategy from DeepSeek-R1. Please refer to [Tech Blog 4](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.md), [Tech Blog 8](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.md), and [Tech Blog 14](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.md) for more details.
+ 
+The difference lies in the DSA indexer. When utilizing Tensor Parallelism (TP) for attention modules, typically in latency-oriented scenarios, TP is not applied to the indexer layers. Instead, it is applied exclusively to the MLA components (i.e., the remaining layers of the attention module).
+
+## Key Features
+In TensorRT LLM, there are many advanced features that are crucial for maximizing LLM inference performance, such as CUDA Graph, Overlap Scheduler, Speculative Decoding, etc. Given the architectural innovations in DeepSeek-V3.2, ensuring its compatibility with these features is important.
+
+As illustrated in [Tech Blog 3](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md), both CUDA Graph and the Overlap Scheduler offer significant throughput improvements. For CUDA Graph support, which is typically enabled during decoding-only iterations where all requests are in the decoding phase, we must ensure that kernels in the DSA module support graph capture and that input/output tensor shapes remain consistent for a given batch size. Regarding the Overlap Scheduler, it is critical to eliminate any CPU-GPU synchronization within the DSA forward, as this would disrupt the execution pipeline. Other key features are discussed in the following subsections.
+
+### MTP
+Multi-Token Prediction (MTP) is a speculative decoding method used in DeepSeek series models. It verifies and accepts multiple draft tokens in a single iteration, significantly improving inference performance in both low-latency and high-throughput scenarios. The DeepSeek-V3.2 also supports MTP. For latency-critical scenarios, as detailed in [Tech Blog 1](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md), MTP-3 is recommended to maximize GPU utilization and achieve optimal performance. For other scenarios, MTP-1 typically offers performance gains as well.
+
+However, the decoding indexer MQA kernel supports sequence lengths of only 1 or 2, limiting native support to MTP-off or MTP-1. To enable MTP > 1, we offer two solutions. The long-term solution involves updating the MQA kernel to support larger sequence lengths, which will be introduced in the MQA kernel optimization section. The immediate workaround (in [PR-9045](https://github.com/NVIDIA/TensorRT-LLM/pull/9045)) uses the existing kernel by flattening the sequence length dimension into the batch dimension, treating the input as a tensor with a sequence length of 1. While this approach ignores the causal mask during the indexer MQA forward, causing discrepancies in the diagonal regions compared to ground truth, the subsequent Top-K kernel handles causal masking correctly. Therefore, the final Top-K indices remain unaffected, allowing this workaround to support MTP-N for any N.
+
+### Disaggregated Serving
+Disaggregated serving decouples the prefill and decoding phases, allowing them to run on separate GPU pools with optimized parallel strategies. This feature is crucial for deploying LLMs on high-performance systems like GB200 NVIDIA GPU HWs. However, it requires transferring KV cache blocks from the prefill to the decoding GPUs. DeepSeek-V3.2 introduces an additional 'indexer K cache,' which presents unique challenges for cache management and transmission in a disaggregated setup.
+
+To address this, [PR-8699](https://github.com/NVIDIA/TensorRT-LLM/pull/8699) integrated indexer K cache support into the existing kvCacheManager, enabling it to inherit existing cache features. Subsequently, [PR-8735](https://github.com/NVIDIA/TensorRT-LLM/pull/8735) extended disaggregated serving capabilities to DeepSeek-V3.2, allowing TensorRT LLM to handle the transmission of the indexer K cache. Currently, the implementation specifically targets the indexer K cache, but we plan to generalize this support in future updates.
+
+### Chunked Prefill and KV Cache Reuse
+Two additional critical features are chunked prefill and KV cache reuse. Chunked prefill removes input length constraints for long prompts and enables prefill chunks to be batched alongside more decoding requests, boosting throughput. KV cache reuse allows requests sharing common prefixes (e.g., system prompts or multi-turn conversations) to share cached blocks, drastically reducing time-to-first-token (TTFT).
+
+On the implementation side, kvCacheManager already supports the newly introduced indexer K cache, extending compatibility to both chunked prefill and KV cache reuse. Then [PR-9376](https://github.com/NVIDIA/TensorRT-LLM/pull/9376) enabled DSA to perform prefill computation with past tokens saved in the cache, thereby unlocking chunked prefill support. Building on this, [PR-9383](https://github.com/NVIDIA/TensorRT-LLM/pull/9383) implemented KV cache reuse for DeepSeek-V3.2 by reusing the chunked prefill changes.
+
+### Wide Expert Parallelism (Wide-EP) 
+The Wide-EP is an important feature for boosting inference throughput in large-scale Mixture-of-Experts (MoE) models. For the DeepSeek-V3.2 model, after supporting the disaggregated serving, [PR-9245](https://github.com/NVIDIA/TensorRT-LLM/pull/9245) simply registered the model with the Expert Parallelism Load Balancer (EPLB). This integration allows Wide-EP and EPLB to be enabled, significantly enhancing performance.
+
+### Chat Template and Tool Parser
+DeepSeek-V3.2 introduces a new chat template compared to prior versions. This update incorporates support for tool calling and the 'thinking with tools' capability. These enhancements, along with the necessary tool parser, were implemented in [PR-9814](https://github.com/NVIDIA/TensorRT-LLM/pull/9814) and [PR-10126](https://github.com/NVIDIA/TensorRT-LLM/pull/10126). To enable this new chat template when deploying with `trtllm-serve` or `trtllm-eval`, please specify the argument `--custom_tokenizer deepseek_v32`.
+
+## Key Optimizations
+DeepSeek-V3.2 can inherit the MoE optimizations from DeepSeek-R1. Consequently, this section focuses exclusively on the DSA part, covering both kernel and system-level optimizations.
+
+### Kernel Optimizations
+
+#### Sparse MLA Kernel
+Sparse MLA serves as the core kernel of DSA, enabling attention computation with fine-grained token sparsity. To efficiently support this sparsity pattern, we leverage the new TMALDG.Gather4 instruction on Blackwell GPUs. This instruction loads four rows from a source 2D tensor and coalesces them into a single destination tensor, making it ideal for fine-grained sparse attention operations.
+ 
+Similar to the dense MLA kernel, FP8 KV cache optimization is crucial for reducing KV cache size and improving E2E throughput. For DSA, we employ per-tensor FP8 quantization: both Query (Q) and Key-Value (KV) tensors are quantized, and FP8 arithmetic is utilized for the sparse MLA computation. To validate the model accuracy under this configuration, the table below presents the GPQA-Diamond accuracy comparison between BF16 and per-tensor FP8 KV cache for the DeepSeek-V3.2-Exp model. PR-8692 introduced this FP8 sparse MLA support, yielding up to a 47.03% improvement in throughput (TPS/GPU).
+
+| KV Cache Type                | FP8 checkpoint | NVFP4 checkpoint |
+| :--------------------------- | :------------- | :--------------- |
+| BF16 Sparse MLA and KV cache | 80.30          | 79.29            |
+| FP8 Sparse MLA and KV cache  | 78.28          | 80.30            |
+
+Another important optimization is SwapsMmaAb, designed specifically for Tensor Parallelism (TP) scenarios. When TP is enabled for sparse MLA, input tensors are partitioned along the Q head dimension. Consequently, each rank processes a reduced number of Q heads ($128 / \text{TP}$), leading to Tensor Core underutilization. SwapsMmaAb addresses this bottleneck by swapping the A and B operands during matrix multiplication to improve hardware utilization.
+
+#### Indexer Top-K Kernel
+DSA contains a module called Top-K Selector. It is a fine-grained token selection mechanism that retrieves only the key-value entries corresponding to the Top-K index scores. The index scores are from Lightning Indexer. This part will select the top 2048 tokens for each query. 
+
+##### Deterministic Top-K vs Non-deterministic Top-K
+The Top‑K problem aims to find the largest (or smallest) K elements from a set of N candidates. Because some of the N candidates may have identical values, there can be more than K elements that are tied with the K‑th element. In such cases, deciding which of the tied elements are included in the final Top‑K set affects whether the output is deterministic. If the tied elements are selected randomly, the results will be non‑deterministic. Conversely, if we always prioritize elements with smaller indices, the results will be deterministic.
+
+Obtaining deterministic results generally requires a more complex algorithm and incurs higher latency than a non‑deterministic version. In DeepSeek V3.2, we first need to determine whether such determinism is actually necessary. We compare the accuracy between the deterministic (DE) and non‑deterministic versions of Top‑K with the GPQA-Diamond dataset. The scores are pretty close:
+| GPQA-Diamond | DE Top-K | Non-DE Top-K |
+| :----------- | :------ | :---------- |
+| FP8 model    | 79.8    | 79.9        |
+| NVFP4 model  | 80.3    | 79.4        |
+
+So we decided to use the non‑DE parallel Top‑K algorithm for DeepSeek V3.2.
+
+##### Radix-select-based Top-K Parallel Algorithm
+<div align="center">
+<figure>
+  <img src="https://github.com/NVIDIA/TensorRT-LLM/raw/main/docs/source/blogs/media/tech_blog15_radix_select_topk.png" alt="tech_blog15_radix_select_topk" width="1280" height="auto">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 3. Radix-select-based Top-K.</em></sub></p>
+
+In general, there are two kinds of parallel Top‑K algorithms: partition‑based methods and priority‑queue‑based methods. The runtime of existing priority‑queue approaches grows rapidly as K increases, and the K value is as large as 2048 for the indexer Top-K in deepseek v3.2, so we choose partition‑based methods instead. Specifically, we adopt radix‑select as our baseline.
+For 32‑bit values with 8‑bit digits, a naïve radix Top‑K algorithm runs 4 iterations, with 4 kernel launches per iteration. In each iteration, it (1) Histogram: counts how many elements fall into each digit bucket based on the current bits; (2) Prefix Sum: builds a prefix sum over these bucket counts; (3) Find target digits: identifies which bucket contains the K‑th element; and (4) Filtering: keeps all elements in smaller buckets as definite Top‑K, discards elements in larger buckets, and passes elements in the target bucket to the next iteration as new candidates.
+
+##### Optimizations for Indexer Top-K
+**Skip iterations with parallel sorting.** In addition to the basic radix‑select method, we introduce further optimizations to speed up the Top‑K computation. In practice, with either 8‑bit radix select (four iterations) or 11‑bit radix select (three iterations), the number of candidates typically drops sharply after the first one or two iterations on real datasets.
+Our key optimization is to bypass the remaining radix‑select iterations and switch to a parallel sort once the candidate set becomes sufficiently small (smaller than 2048 in the current implementation). When the number of candidates is relatively small, we use a low-overhead naive O(N²) comparison-based ranking algorithm. For each element, we compare it against all others to determine its final position, and if this position is smaller than K, we keep it as part of the Top‑K output.  Otherwise, we use the parallel sort from CUB to get the results. The basic implementation and this optimization were added in [PR-8882](https://github.com/NVIDIA/TensorRT-LLM/pull/8882).
+
+**Specialization for different cases.** When running with real datasets, we found that the number of candidates reaching the final sorting stage was larger than expected, which resulted in higher runtime overhead. To address this issue, [PR-9255](https://github.com/NVIDIA/TensorRT-LLM/pull/9255) introduced an additional preliminary bin-distribution step to reduce the number of candidates more efficiently before the final sort. This preprocessing step halves the candidate set and uses the leading 11 bits of each value to compute its bin index.  
+
+##### Performance Results
+
+<sub><em>Table1: Compare the performance of torch.topk and our customized Top-K op on B200.</em></sub>
+| File                          | torch.topk(us) | TopKPerRow(us) | Speedup |
+|-------------------------------|----------------|----------------|---------|
+| topk_inputs_layer0_rank0.npy  | 106.877        | 14.069         | 7.596   |
+| topk_inputs_layer0_rank1.npy  | 109.501        | 14.217         | 7.702   |
+| topk_inputs_layer0_rank2.npy  | 104.616        | 14.079         | 7.431   |
+| topk_inputs_layer0_rank3.npy  | 105.049        | 14.016         | 7.495   |
+| topk_inputs_layer0_rank4.npy  | 105.526        | 14.073         | 7.498   |
+| topk_inputs_layer0_rank5.npy  | 105.034        | 13.986         | 7.510   |
+| topk_inputs_layer0_rank6.npy  | 104.516        | 14.079         | 7.423   |
+| topk_inputs_layer0_rank7.npy  | 105.099        | 14.189         | 7.407   |
+| topk_inputs_layer10_rank0.npy | 109.614        | 15.281         | 7.173   |
+| topk_inputs_layer10_rank1.npy | 104.838        | 15.284         | 6.859   |
+| Average                       | 106.067        | 14.327         | 7.410   |
+
+We use the data that is exported from real datasets across different layers. The input tensor size for each case is [64, 9295]. We select the top 2048 from the valid candidates for each query. As shown in Table 1, compared to the native torch.topk implementation, our implementation achieves an average speedup of 7.41x. This significantly optimizes the duration of the indexer module.
+
+Overall, by replacing the DE-version Top-K from PyTorch with our customized non-DE Top-K kernel, which brings 25%~40% and 14%~24% e2e speedup for the low latency and throughput scenarios.
+
+#### DeepGEMM MQA Kernel
+The DeepGEMM MQA kernel computes logits for the Top-K selection process. To enhance efficiency on Blackwell GPUs, several optimizations were implemented targeting both performance and ease of use:
+
+- Larger MMA Tile Size: We increased the MMA tile size for both the prefill and decoding MQA kernels, yielding up to a 10% performance improvement. This optimization was implemented in commit [2f9d878](https://github.com/deepseek-ai/DeepGEMM/commit/2f9d87877ed691a62796c25f2e9496a5e0b7123a) and [fc97232](https://github.com/deepseek-ai/DeepGEMM/commit/fc97232c6f23bf5b4be5bdef52af8ce5dc499460).
+- Flexible Paged KV Cache Configurations: The decoding MQA kernel now supports a wider range of configurations. While the initial version was restricted to a block size of 64 tokens, commit [c5d4d74](https://github.com/deepseek-ai/DeepGEMM/commit/c5d4d7448665ae90a81d9d31d60d445010da50f0) enabled support for any block size $B$ satisfying the condition $64 \% B = 0$.
+- MTP-3 Support: Previously, the kernel was limited to MTP-0 or MTP-1 (predicting at most one draft token). Since MTP-3 typically delivers superior performance in low-latency scenarios, optimizations were introduced (see commit [2be3f36](https://github.com/deepseek-ai/DeepGEMM/commit/2be3f367854702e3887ff5b28b274cb16b441af9)) to enable native MTP-3 support.
+
+#### Kernel Fusion
+Kernel fusion is a standard optimization technique for improving performance. For DeepSeek-V3.2, we implemented specific fusion strategies:
+
+- Custom Kernels for Indexer K Cache Population: The indexer MQA utilizes blockwise FP8 for both Q and K inputs, requiring the indexer K cache to store data in a specific blockwise FP8 format. During the forward pass, the indexer K tensor must be quantized, and both the values and scaling factors are saved to the cache. To optimize this, [PR-8701](https://github.com/NVIDIA/TensorRT-LLM/pull/8701) fused the blockwise FP8 quantization logic into a single kernel. Since the original PyTorch operations were a bottleneck, this resulted in a significant 32.64%–64.20% improvement in inference throughput. Subsequently, [PR-8960](https://github.com/NVIDIA/TensorRT-LLM/pull/8960) fused indexer K tensor storage operations into a custom kernel, delivering an additional 3.5%–13.4% end-to-end (E2E) performance gain.
+- Fusing Small Kernels via torch.compile(): Beyond the major kernels, DSA involves numerous small kernels with low latencies. To reduce kernel launch overhead, we leverage torch.compile() to fuse these smaller operations:
+    - [PR-8988](https://github.com/NVIDIA/TensorRT-LLM/pull/8988) consolidated indexer weight scaling for blockwise FP8 quantization.
+    - [PR-9052](https://github.com/NVIDIA/TensorRT-LLM/pull/9052) fused LayerNorm operations, yielding around 1.42% speedup for low-latency scenarios and 1.90% for throughput-oriented scenarios.
+
+### System Optimizations
+
+#### Multi-steams
+Multi-stream execution is leveraged in the following optimizations:
+
+- [PR-8988](https://github.com/NVIDIA/TensorRT-LLM/pull/8988) employs multi-stream to overlap indexer weight scaling with the indexer K cache update. Combined with torch.compile() optimization for the indexer weight scaling, this yields approximately 2.53% speedup in low-latency scenarios.
+- When improving the blockwise FP8 quantization in [PR-8701](https://github.com/NVIDIA/TensorRT-LLM/pull/8701), multi-stream is also used to enable concurrent quantization of the indexer Q and K tensors.
+- [PR-9243](https://github.com/NVIDIA/TensorRT-LLM/pull/9243) changed the indexer weight projection GEMM to FP32 to improve accuracy. However, this introduced a performance regression compared to the low-precision implementation. To mitigate this, multi-stream is utilized to overlap the FP32 weight projection GEMM with the indexer low-rank Q projection GEMM, LayerNorm, and Q/K RoPE operations.
+
+#### A Fast Path for Short Sequences
+DeepSeek-V3.2 employs K=2048 for the Top-K selector. For sequences with length $N \le 2048$, all past KV tokens are inherently selected, rendering the MQA and Top-K operations redundant. [PR-9524](https://github.com/NVIDIA/TensorRT-LLM/pull/9524) implements a "fast path" to bypass these unnecessary operations for short sequences.
+
+For the implementation, we can simply generate dense indices during DSA preparation, and directly change to use these dense indices in the indexer forward for prefill requests. However, decoding requests present a challenge due to CUDA Graph integration since the CUDA graph is usually enabled for decoding-only iterations. To ensure compatibility, we capture separate CUDA graphs for short and long sequences. At the start of each iteration, the system checks the sequence lengths: if any request in the batch exceeds the threshold, the long-sequence graph is triggered; otherwise, the short-sequence graph is utilized. This optimization yields approximately 1.03x speedup for 1K/1K scenarios.
+
+## How to Reproduce
+This section provides the reproducing steps for NVIDIA Blackwell B200 GPUs, for both model accuracy evaluation and performance benchmark.
+ 
+The DeepSeek-V3.2 FP4 model is used for evaluation and benchmarking. You can follow [the command of the Model-Optimizer](https://github.com/NVIDIA/Model-Optimizer/tree/main/examples/deepseek#experimental-deepseek-v32) to quantize the original DeepSeek-V3.2 model to FP4.
+
+### Accuracy Evaluation
+Evaluate the model accuracy using trtllm-eval.
+
+1. Prepare an advanced configuration file:
+```
+cat >./config.yml <<EOF
+cuda_graph_config:
+	enable_padding: true
+	batch_sizes: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,32,64,128]
+enable_attention_dp: true
+kv_cache_config:
+	free_gpu_memory_fraction: 0.8
+	dtype: fp8
+moe_config:
+	backend: TRTLLM
+speculative_config:
+	decoding_type: MTP
+	num_nextn_predict_layers: 1
+EOF
+```
+2. Evaluate accuracy on the [MMLU](https://people.eecs.berkeley.edu/~hendrycks/data.tar) dataset:
+```
+model_path=<your model path>
+trtllm-eval --model ${model_path} \
+	--tp_size 8 \
+	--ep_size 8 \
+	--kv_cache_free_gpu_memory_fraction 0.8 \
+	--config ./config.yml \
+	--custom_tokenizer deepseek_v32 \
+	mmlu
+```
+3. Evaluate accuracy on the [GSM8K](https://huggingface.co/datasets/openai/gsm8k) dataset:
+```
+trtllm-eval --model ${model_path} \
+	--tp_size 8 \
+	--ep_size 8 \
+	--kv_cache_free_gpu_memory_fraction 0.8 \
+	--config ./config.yml \
+	--custom_tokenizer deepseek_v32 \
+	gsm8k
+```
+4. Evaluate accuracy on the [GPQA-Diamond](https://huggingface.co/datasets/Idavidrein/gpqa) dataset:
+```
+trtllm-eval --model ${model_path} \
+	--tp_size 8 \
+	--ep_size 8 \
+	--kv_cache_free_gpu_memory_fraction 0.8 \
+	--config ./config.yml \
+	--custom_tokenizer deepseek_v32 \
+	gpqa_diamond \
+	--apply_chat_template \
+	--chat_template_kwargs '{"thinking": true}' \
+	--max_output_length 120000
+```
+
+### Benchmark on B200
+
+#### Min-latency
+Our benchmark results are based on Batch = 1, ISL = 8K, OSL = 1K, num_requests = 10 from a synthetic dataset.
+To do the benchmark, run the following command:
+```
+data_path=<your dataset file following the format>
+model_path=<your model path>
+ 
+cat <<EOF > ./config.yml
+cuda_graph_config:
+	enable_padding: true
+	batch_sizes: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,32,64,128]
+kv_cache_config:
+	free_gpu_memory_fraction: 0.8
+	dtype: fp8
+moe_config:
+	backend: TRTLLM
+speculative_config:
+	decoding_type: MTP
+	num_nextn_predict_layers: 3
+EOF
+ 
+trtllm-bench -m deepseek-ai/DeepSeek-V3.2-Exp \
+	--model_path ${model_path} throughput \
+	--tp 4 \
+	--warmup 1 \
+	--dataset ${data_path} \
+	--backend pytorch \
+	--max_batch_size 1 \
+	--max_num_tokens 8384 \
+	--kv_cache_free_gpu_mem_fraction 0.8 \
+	--concurrency 1 \
+	--config ./config.yml \
+	--num_requests 10 \
+	--streaming
+```
+The expected results:
+```
+===========================================================
+= PERFORMANCE OVERVIEW
+===========================================================
+Request Throughput (req/sec):                 	  0.2678
+Total Output Throughput (tokens/sec):         	  274.1786
+Total Token Throughput (tokens/sec):          	  2467.6070
+Total Latency (ms):                           	  37347.9238
+Average request latency (ms):                 	  3734.7334
+Per User Output Throughput [w/ ctx] (tps/user):   276.2231
+Per GPU Output Throughput (tps/gpu):          	  68.5446
+Average time-to-first-token [TTFT] (ms):      	  425.9885
+Average time-per-output-token [TPOT] (ms):    	  3.2344
+Per User Output Speed (tps/user):             	  312.0708
+```
+<sub><em>\* Note that `max_num_tokens` is set to a large value to cover the maximum sequence length. Please refer to the [Best Performance Practices](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md#wip-enable-more-features-by-default) for more details on `max_num_tokens` configuration.</em></sub>
+
+#### Max-throughput
+Our benchmark results are based on Batch = 256, ISL = 8K, OSL = 1K, num_requests = 768 from a synthetic dataset. 
+To do the benchmark, run the following command:
+```
+data_path=<your dataset file following the format>
+model_path=<your model path>
+ 
+cat <<EOF > ./config.yml
+cuda_graph_config:
+	enable_padding: true
+    batch_sizes: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,32,64,128]
+enable_attention_dp: true
+kv_cache_config:
+	free_gpu_memory_fraction: 0.8
+	dtype: fp8
+moe_config:
+	backend: TRTLLM
+speculative_config:
+	decoding_type: MTP
+	num_nextn_predict_layers: 1
+EOF
+ 
+trtllm-bench -m deepseek-ai/DeepSeek-V3.2-Exp \
+	--model_path ${model_path} throughput \
+	--tp 8 \
+	--ep 8 \
+	--warmup 1 \
+	--dataset ${data_path} \
+	--backend pytorch \
+	--max_batch_size 256 \
+	--max_num_tokens 8576 \
+	--kv_cache_free_gpu_mem_fraction 0.8 \
+	--concurrency 256 \
+	--config ./config.yml \
+	--num_requests 768 \
+	--streaming
+```
+The expected results:
+```
+===========================================================
+= PERFORMANCE OVERVIEW 
+===========================================================
+Request Throughput (req/sec):                     8.4162
+Total Output Throughput (tokens/sec):             8618.2158
+Total Token Throughput (tokens/sec):              77563.9425
+Total Latency (ms):                               365009.1921
+Average request latency (ms):                     120325.7013
+Per User Output Throughput [w/ ctx] (tps/user):   9.8876
+Per GPU Output Throughput (tps/gpu):              1077.2770
+Average time-to-first-token [TTFT] (ms):          19537.7776
+Average time-per-output-token [TPOT] (ms):        98.5219
+Per User Output Speed (tps/user):                 11.2591
+```
+
+### Benchmark with Wide-EP on GB200
+To validate the efficacy of Wide-EP on DeepSeek-V3.2, we evaluated performance using the NVFP4 model on a GB200 NVL72 system. We compared EP16 and EP32 configurations against EP4 and EP8 baselines, with benchmarks conducted at ISL=8K and OSL=1K using the [Rate Matching](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.md#measurement-methodology) methodology.
+
+<div align="center">
+<figure>
+  <img src="https://github.com/NVIDIA/TensorRT-LLM/raw/main/docs/source/blogs/media/tech_blog15_ds32_wide_ep.png" alt="tech_blog15_ds32_wide_ep" width="700" height="auto">
+</figure>
+</div>
+<p align="center"><sub><em>Figure 4. DeepSeek-V3.2 throughput on ISL/OSL 8k/1k. Note that the numbers were collected on November 20th, and more optimizations are still on-going.</em></sub></p>
+
+As illustrated in Figure 4, Wide-EP yields up to a 2.28x improvement in per-GPU output throughput. To reproduce these results, please refer to the [examples/wide_ep/slurm_scripts](https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/wide_ep/slurm_scripts) directory. These scripts demonstrate how to launch disaggregated serving with large-scale EP and associated features on a SLURM cluster.
+
+## Future Works
+
+- Optimize performance for long-sequence scenarios (e.g., ISL=32K, OSL=4K).
+- Optimize performance for large Expert Parallelism (EP) configurations.
+- Evaluate dense MHA versus MQA modes for context sparse MLA to determine the optimal configuration for processing short sequences.
+- Explore more aggressive quantization strategies for DSA.
+- Optimize the implementation of the indexer Top-K kernel.
+- Investigate KV cache offloading mechanisms for DSA.
+
+## Acknowledgement
+Achieving these remarkable performance gains since the release of DeepSeek-V3.2-Exp was truly a collaborative triumph. We extend our deepest gratitude to everyone who contributed to the functional implementation and performance optimization of the DeepSeek-V3.2 model.
+
+This work serves as a testament to TensorRT LLM's flexibility and effectiveness in supporting architectural innovations and novel sparse attention mechanisms. We hope this work paves the way for further advancements in sparse attention support.
--- a/docs/source/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.md
+++ b/docs/source/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.md
@ -227,3 +227,7 @@ Run `bench.sh` to begin a serving benchmark. This will take a long time if you r
 ```shell
 ./bench.sh
 ```
+
+## Known Issues
+
+Qwen3-Next-80B-A3B exhibits relatively low accuracy on the SciCode-AA-v2 benchmark.
--- a/docs/source/models/supported-models.md
+++ b/docs/source/models/supported-models.md
@ -38,13 +38,14 @@ Note: Support for other models may vary. Features marked "N/A" are not applicabl
 | `DeepseekV3ForCausalLM`          | Yes               | Yes        | Yes                        | Yes                   | Yes [^1]        | Yes | No                        | No                        | Yes           | Yes              | Yes [^2]       | N/A                      | Yes                   | Yes             |
 | `DeepseekV32ForCausalLM`         | Yes               | Yes        | Yes                        | Yes                   | Yes             | Yes | No                        | No                        | Yes           | Yes              | Yes            | N/A                      | Yes                   | Yes             |
 | `Qwen3MoeForCausalLM`            | Yes               | Yes        | Yes                        | Yes                   | Yes             | No  | Yes                       | Yes                       | Yes           | Yes              | Yes            | N/A                      | Yes                   | Yes             |
-| `Qwen3NextForCausalLM`           | Yes                | Yes        | No                         | Untested                    | Yes              | No  | No                        | No                        | Yes            | Yes               | No             | No                       | Untested                    | Untested              |
+| `Qwen3NextForCausalLM` [^3]          | Yes                | Yes        | No                         | Untested                    | Yes              | No  | No                        | No                        | Yes            | Yes               | No             | No                       | Untested                    | Untested              |
 | `Llama4ForConditionalGeneration` | Yes               | Yes        | Yes                        | Yes                   | Yes             | No  | Yes                       | Yes                       | Yes           | Yes              | Untested       | N/A                      | Yes                   | Yes             |
-| `GptOssForCausalLM`            | Yes              | Yes         | Yes                        | Yes                   | Yes             | No   | Yes                       | Yes [^3]                   | Yes           | Yes              | Yes             | N/A                      | Yes                    | Yes             |
+| `GptOssForCausalLM`            | Yes              | Yes         | Yes                        | Yes                   | Yes             | No   | Yes                       | Yes [^4]                   | Yes           | Yes              | Yes             | N/A                      | Yes                    | Yes             |

 [^1]: Chunked Prefill for MLA can only be enabled on SM100/SM103.
 [^2]: KV cache reuse for MLA can only be enabled on SM90/SM100/SM103 and in BF16/FP8 KV cache dtype.
-[^3]: Overlap scheduler isn't supported when using EAGLE-3(Two Model Engine) for GPT-OSS.
+[^3]: Qwen3-Next-80B-A3B exhibits relatively low accuracy on the SciCode-AA-v2 benchmark.
+[^4]: Overlap scheduler isn't supported when using EAGLE-3(Two Model Engine) for GPT-OSS.


 # Multimodal Feature Support Matrix (PyTorch Backend)
--- a/examples/auto_deploy/model_registry/configs/attn_backend_triton.yaml
+++ b/examples/auto_deploy/model_registry/configs/attn_backend_triton.yaml
@ -0,0 +1 @@
+attn_backend: triton
--- a/examples/auto_deploy/model_registry/models.yaml
+++ b/examples/auto_deploy/model_registry/models.yaml
@ -65,7 +65,7 @@ models:
 - name: bigcode/starcoder2-7b
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: bigcode/starcoder2-15b-instruct-v0.1
-  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
+  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'attn_backend_triton.yaml']
 - name: deepseek-ai/DeepSeek-Prover-V1.5-SFT
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: deepseek-ai/DeepSeek-Prover-V2-7B
@ -118,8 +118,6 @@ models:
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
 - name: google/gemma-3-27b-it
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
- name: google/gemma-3-2b-it
-  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 - name: deepseek-ai/DeepSeek-V2.5
  yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
 # DISABLED: Network timeout downloading from Hugging Face
@ -145,8 +143,6 @@ models:
 # DISABLED: Graph transformation error in auto-deploy
 # - name: neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8
 #   yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: TheBloke/falcon-40b-instruct-GPTQ
-  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
 - name: Qwen/QwQ-32B
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
 - name: google/gemma-2-27b-it
@ -159,7 +155,7 @@ models:
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
 - name: Qwen/QwQ-32B-Preview
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
- name: Qwen/Qwen3-Coder-32B-Instruct
+- name: Qwen/Qwen3-Coder-30B-A3B-Instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
 - name: Qwen/Qwen3-235B-A22B-Instruct-2507
  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
@ -222,3 +218,5 @@ models:
  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_scout.yaml']
 - name: meta-llama/Llama-4-Maverick-17B-128E-Instruct
  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_maverick_lite.yaml']
+- name: nvidia/NVIDIA-Nemotron-3-Super-120B-BF16-BF16KV-010726
+  yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml','super_v3.yaml']
--- a/examples/layer_wise_benchmarks/run.py
+++ b/examples/layer_wise_benchmarks/run.py
@ -10,10 +10,11 @@ import torch
 import yaml

 from tensorrt_llm._torch.autotuner import AutoTuner, autotune
+from tensorrt_llm._torch.distributed import MPIDist, TorchDist
 from tensorrt_llm._torch.modules.fused_moe.fused_moe_cutlass import CutlassFusedMoE
 from tensorrt_llm._torch.modules.fused_moe.interface import AlltoallMethodType
 from tensorrt_llm._torch.modules.multi_stream_utils import with_multi_stream
-from tensorrt_llm._utils import local_mpi_rank, mpi_rank, mpi_world_size
+from tensorrt_llm._utils import local_mpi_rank, mpi_disabled, mpi_rank, mpi_world_size
 from tensorrt_llm.logger import logger
 from tensorrt_llm.tools.layer_wise_benchmarks import BalanceMethod, get_runner_cls, mark_ranges

@ -173,6 +174,8 @@ run_pack = runner.create_run_pack(
 )
 if args.enable_autotuner:
    cache_path = os.getenv("TLLM_AUTOTUNER_CACHE_PATH") or None
+    dist = TorchDist(mapping=mapping) if mpi_disabled() else MPIDist(mapping=mapping)
+    AutoTuner.get().setup_distributed_state(mapping, dist)
    with autotune(cache_path=cache_path):
        run_pack()
 else:
--- a/examples/models/core/exaone/README.md
+++ b/examples/models/core/exaone/README.md
@ -1,9 +1,6 @@
 # EXAONE

-This document shows how to build and run a [EXAONE](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct) model in TensorRT-LLM.
-
-The TensorRT LLM EXAONE implementation is based on the LLaMA model. The implementation can be found in [llama/model.py](../../../../tensorrt_llm/models/llama/model.py).
-See the LLaMA example [`examples/models/core/llama`](../llama) for details.
+This document shows how to build and run [EXAONE](https://huggingface.co/LGAI-EXAONE) models in TensorRT-LLM.

 - [EXAONE](#exaone)
  - [Support Matrix](#support-matrix)
@ -11,31 +8,51 @@ See the LLaMA example [`examples/models/core/llama`](../llama) for details.
    - [EXAONE-3.0](#exaone-30)
    - [EXAONE-Deep](#exaone-deep)
    - [EXAONE-4.0](#exaone-40)
-  - [Usage](#usage)
-    - [PyTorch flow](#pytorch-flow)
-        -[PyTorch flow Quantization](#pytorch-flow-quantization)
-    - [TRT Flow](#trt-flow)
+    - [K-EXAONE](#k-exaone)
+  - [PyTorch flow](#pytorch-flow)
+    - [Running EXAONE-4.0](#running-exaone-40)
+    - [Running K-EXAONE](#running-k-exaone)
+      - [MoE Backend Options](#moe-backend-options)
+    - [PyTorch flow Quantization](#pytorch-flow-quantization)
+      - [FP8 Quantization](#fp8-quantization)
+      - [NVFP4 Quantization](#nvfp4-quantization)
+  - [Running the TensorRT LLM Server](#running-the-tensorrt-llm-server)
+    - [Running Aggregated TensorRT LLM Server](#running-aggregated-tensorrt-llm-server)
+      - [Creating the Extra Options Configuration](#creating-the-extra-options-configuration)
+      - [Launch trtllm-serve OpenAI-compatible API server](#launch-trtllm-serve-openai-compatible-api-server)
+    - [Running Disaggregated TensorRT LLM Server](#running-disaggregated-tensorrt-llm-server)
+      - [Step 1: Set Environment Variables](#step-1-set-environment-variables)
+      - [Step 2: Create Configuration Files](#step-2-create-configuration-files)
+      - [Step 3: Launch the Disaggregated Server](#step-3-launch-the-disaggregated-server)
+  - [TRT flow](#trt-flow)
    - [Convert checkpoint and build TensorRT engine(s)](#convert-checkpoint-and-build-tensorrt-engines)
    - [FP8 Post-Training Quantization](#fp8-post-training-quantization)
    - [SmoothQuant](#smoothquant)
    - [Groupwise quantization (AWQ)](#groupwise-quantization-awq)
-        - [W4A16 AWQ with FP8 GEMM (W4A8 AWQ)](#w4a16-awq-with-fp8-gemm-w4a8-awq)
+      - [W4A16 AWQ with FP8 GEMM (W4A8 AWQ)](#w4a16-awq-with-fp8-gemm-w4a8-awq)
    - [Run Engine](#run-engine)
+  - [Troubleshootings](#troubleshootings)
+    - [Troubleshootings for EXAONE-4.0](#troubleshootings-for-exaone-40)
+    - [Troubleshootings for K-EXAONE](#troubleshootings-for-k-exaone)

 ## Support Matrix
  * FP16
  * BF16
-  * Tensor Parallel
+  * Tensor Parallel (TP)
+  * Expert Parallel (EP) (K-EXAONE only)
+  * Attention Data Parallel (ADP) (K-EXAONE only)
+  * Disaggregated Serving
  * FP8
  * INT8 & INT4 Weight-Only
  * INT8 SmoothQuant
  * INT4 AWQ & W4A8 AWQ
+  * NVFP4 (K-EXAONE only)

 ## Supported Models

-**Note:**  
- **EXAONE-3.0** and **EXAONE-Deep** are supported using the [TRT Flow](#trt-flow).  
- **EXAONE-4.0** is supported using the [PyTorch flow](#pytorch-flow).  
+**Note:**
+- **EXAONE-3.0** & **EXAONE-Deep** are supported using the [TRT Flow](#trt-flow).
+- **EXAONE-4.0** & **K-EXAONE** are supported using the [PyTorch flow](#pytorch-flow).

 Please refer to the corresponding sections below for usage instructions and examples for each model.

@ -59,23 +76,33 @@ git clone https://huggingface.co/LGAI-EXAONE/EXAONE-Deep-2.4B $HF_MODEL_DIR

 ### EXAONE-4.0

-Download he HuggingFace checkpoints of EXAONE-4.0 model. Here, we only use the `EXAONE-4.0-32B` model for the example. From EXAONE-4.0 model, we support only on PyTorch flow.
+Download the HuggingFace checkpoints of the EXAONE-4.0 model. Here, we use the `EXAONE-4.0-32B` model as an example. EXAONE-4.0 is supported only via the PyTorch flow.

 ```bash
 export HF_MODEL_DIR=hf_models/exaone4
 git clone https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B $HF_MODEL_DIR
 ```

-### Pytorch flow
+### K-EXAONE

+K-EXAONE is a Mixture of Experts (MoE) model based on the EXAONE architecture. It features a hybrid architecture with both dense and MoE layers, sliding window attention, and supports FP8 and NVFP4 quantization for efficient inference.
+
+Download the HuggingFace checkpoints of the K-EXAONE model:
+
+```bash
+export HF_MODEL_DIR=hf_models/kexaone
+git clone https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B $HF_MODEL_DIR
+```
+
+## PyTorch flow
+
+### Running EXAONE-4.0
 To quickly run EXAONE-4.0 models, you can use [examples/llm-api/quickstart_advanced.py](../../../llm-api/quickstart_advanced.py):

 ```bash
-python ../../../llm-api/quickstart_advanced.py --model_dir hf_models/$MODEL_NAME --disable_kv_cache_reuse
+python ../../../llm-api/quickstart_advanced.py --model_dir $HF_MODEL_DIR
 ```

-SWA currently does not support kv_cache_reuse. Please make sure to disable KV cache reuse when running with SWA.
-
 The output will be like:
 ```bash
 [0] Prompt: 'Hello, my name is', Generated text: " [Your Name], and I'm a [Your Profession]. I'm here to learn and share with you.\n\nBest regards,\n[Your Name]\n\nThis letter is concise, professional, and clearly states who you are and what you're here for. It's a good starting point"
@ -83,47 +110,239 @@ The output will be like:
 [2] Prompt: 'The future of AI is', Generated text: ' not just about technology but also about how we choose to use it. We must ensure that AI is developed and deployed in a way that benefits all of humanity, not just a select few. This means prioritizing ethical considerations, transparency, and accountability in AI development. It also means involving diverse stakeholders in the conversation about AI'
 ```

-#### PyTorch flow Quantization
+### Running K-EXAONE

-For PyTorch flow, TRT-LLM supports quantized format generated by [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer).
-
-You can either do pre-quantized models in HF model hub, or can generate quantized model by yourself and then run models with below command:
+K-EXAONE is a Mixture of Experts model that benefits from multiple parallelism strategies. You can run it with tensor parallelism (TP), expert parallelism (EP), and attention data parallelism (ADP):

 ```bash
-git clone https://github.com/NVIDIA/Model-Optimizer.git
+python ../../../llm-api/quickstart_advanced.py \
+    --model_dir $HF_MODEL_DIR \
+    --tp_size 8 \
+    --moe_ep_size 8 \
+    --enable_attention_dp \
+    --trust_remote_code
+```
+The output will be like:
+```bash
+[0] Prompt: 'Hello, my name is', Generated text: ' John Smith, and I am a 28-year-old software developer. I live in the city of San Francisco, California. I work remotely for a tech startup based in Austin, Texas.\n\nI enjoy hiking, reading, and playing the piano. In my free time, I often explore new neighborhoods in San Francisco, trying out new restaurants and cafes.\n\n'
+[1] Prompt: 'The capital of France is', Generated text: ' Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris'
+[2] Prompt: 'The future of AI is', Generated text: ' bright.\n</think>\n\nThe future of AI holds immense promise across numerous domains. In healthcare, AI is revolutionizing diagnostics, drug discovery, and personalized treatment plans. In education, AI is enabling adaptive learning platforms that cater to individual learning styles and paces. In environmental science, AI is playing a pivotal role in addressing climate change by optimizing'
+```
+
+#### MoE Backend Options
+
+K-EXAONE supports the following MoE backends:
+
+| Backend | Description |
+|---------|-------------|
+| `CUTLASS` | Default backend, optimized for general use cases |
+| `TRTLLM` | TensorRT-LLM backend using TRT-LLM Gen kernels, optimized for low-latency inference |
+| `WIDEEP` | Wide expert parallelism backend for cases where EP size exceeds the number of experts |
+
+You can specify the MoE backend using the `--moe_backend` argument:
+
+```bash
+python ../../../llm-api/quickstart_advanced.py \
+    --model_dir $HF_MODEL_DIR \
+    --tp_size 8 \
+    --moe_ep_size 8 \
+    --enable_attention_dp \
+    --moe_backend CUTLASS \
+    --trust_remote_code
+```
+
+### PyTorch flow Quantization
+
+For PyTorch flow, TRT-LLM supports quantized formats generated by [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer). You can either use pre-quantized models from the HuggingFace model hub, or generate quantized models yourself using the instructions below.
+
+First, clone the [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer) repository:
+
+```bash
+git clone https://github.com/NVIDIA/Model-Optimizer
 cd Model-Optimizer/examples/llm_ptq
-scripts/huggingface_example.sh --model  hf_models/$MODEL_NAME --quant fp8 --export_fmt hf
 ```

-For more information, please refer to official [docs](https://github.com/NVIDIA/Model-Optimizer) or [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer).
+For more information, please refer to the official [Model Optimizer documentation](https://github.com/NVIDIA/Model-Optimizer).

-Troubleshooting
+#### FP8 Quantization
+
+FP8 quantization provides a good balance between model accuracy and inference performance. To quantize a model to FP8 format:

-The following error may occur during quantization:
 ```bash
-torch._dynamo.exc.Unsupported: Graph break under GenericContextWrappingVariable
-Explanation: Attempted to graph break in an active context manager(s) that doesn't support graph breaking.
-Hint: Move the offending context manager(s) to outside the compiled region.
-Hint: This graph break may have been caused by an earlier graph break. Resolving the earlier graph break may resolve this one.
+python3 hf_ptq.py --model $HF_MODEL_DIR --quant fp8 --export_fmt hf
 ```

-This error may indicate an incompatibility between `torch.compile()` and the `HybridCache` module of the transformers library. As a result, [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer) (ModelOpt) cannot perform PTQ with HybridCache.
+#### NVFP4 Quantization

-Temporarily switching to `DynamicCache` when creating PTQ models could help address the issue. This can be done by updating the `cache_implementation` field in the `generation_config.json` file located in the model checkpoint directory, for example:
-```json
-# generation_config.json
-{
-    // Change "hybrid" to "dynamic" to run PTQ.
-    // Revert this to "hybrid" after quantization is complete.
-    "cache_implementation": "hybrid",
-    ...
-}
+NVFP4 (4-bit floating point) quantization enables memory-efficient inference with reduced GPU memory footprint. To quantize a model to NVFP4 format:
+
+```bash
+python3 hf_ptq.py --model $HF_MODEL_DIR --quant nvfp4 --export_fmt hf
 ```
-For models with sliding window attention, DynamicCache is less memory-efficient than HybridCache because it retains the entire key-value cache. However, this does not break the model's attention logic, as the cache implementation is separated from the attention computation itself. This trade-off is acceptable for the PTQ process, which is a one-time procedure. Our tests confirm that this workaround does not degrade accuracy on MMLU or GSM8K benchmarks with the default ModelOpt settings.

-### TRT flow
+## Running the TensorRT LLM Server

-The next section describe how to convert the weights from the [HuggingFace (HF) Transformers](https://github.com/huggingface/transformers) format to the TensorRT LLM format. We will use llama's [convert_checkpoint.py](../llama/convert_checkpoint.py) for EXAONE model and then we build the model with `trtllm-build`.
+This section describes how to deploy the K-EXAONE model using the TensorRT LLM server with an OpenAI-compatible API endpoint.
+Make sure `HF_MODEL_DIR` points to your EXAONE checkpoint directory.
+
+The examples in this section are intended as a minimal, runnable demonstration and are not fully performance-optimized. For more features and performance tuning, please refer the documents below.
+- [Disaggregated Serving examples](../../../disaggregated/README.md)
+- [Disaggregated Serving feature guide](../../../../docs/source/features/disagg-serving.md)
+- [Recommended LLM API configuration settings](../../../configs/README.md) (see also `examples/configs/curated/`)
+
+### Running Aggregated TensorRT LLM Server
+
+The aggregated server runs all components (context and generation phases) on the same set of GPUs, which is suitable for single-node deployments.
+
+#### Creating the Extra Options Configuration
+
+Create a YAML configuration file to specify advanced options such as attention data parallelism, CUDA graph settings, and MoE backend configuration:
+
+```bash
+cat <<EOF > configs.yaml
+enable_attention_dp: true
+trust_remote_code: true
+cuda_graph_config:
+  max_batch_size: 2048
+  enable_padding: true
+moe_config:
+  backend: CUTLASS  # The TRTLLM backend is recommended for the Blackwell architecture.
+kv_cache_config:
+  enable_block_reuse: true  # Please disable the block reuse feature when conducting performance benchmarking.
+  max_attention_window: [128, 128, 128, 131072]  # This allows KV cache manager to possibly improve memory efficiency.
+  free_gpu_memory_fraction: 0.9
+  dtype: "auto"
+attention_dp_config:
+  enable_balance: true
+  batching_wait_iters: 50
+  timeout_iters: 1
+num_postprocess_workers: 4  # Can mitigate the postprocessing overhead (e.g. detokenization)
+EOF
+```
+
+#### Launch trtllm-serve OpenAI-compatible API server
+
+Start the server using `trtllm-serve` with the PyTorch backend. This launches an OpenAI-compatible API server that can handle chat completions and text generation requests:
+
+```bash
+trtllm-serve \
+  $HF_MODEL_DIR \
+  --host localhost \
+  --port 8000 \
+  --backend pytorch \
+  --max_batch_size 2048 \
+  --max_num_tokens 8192 \
+  --tp_size 8 \
+  --ep_size 8 \
+  --pp_size 1 \
+  --config ./configs.yaml
+```
+
+Once the server is running, you can send requests to `http://localhost:8000/v1/completions` using the OpenAI API format.
+
+### Running Disaggregated TensorRT LLM Server
+
+Disaggregated serving separates the context (prefill) and generation (decode) phases onto different GPU sets, enabling better resource utilization and improved throughput. This example demonstrates a single-node disaggregated deployment using 8 GPUs (4 for context, 4 for generation). For more details, see the [Disaggregated Serving documentation](../../../disaggregated/README.md).
+
+#### Step 1: Set Environment Variables
+
+Configure the parallelism and buffer settings:
+
+```bash
+# Buffer size for KV cache transfer between context and generation servers
+export MAX_TOKENS_IN_BUFFER=8192
+
+# Model parallelism configuration
+export TP_SIZE=4
+export MOE_EP_SIZE=4
+export ENABLE_ATTENTION_DP=true
+```
+
+#### Step 2: Create Configuration Files
+
+**Context server configuration (`ctx_extra-llm-api-config.yaml`):**
+
+```bash
+cat > ctx_extra-llm-api-config.yaml << EOF
+backend: pytorch
+trust_remote_code: true
+disable_overlap_scheduler: true
+enable_chunked_prefill: true
+
+tensor_parallel_size: $TP_SIZE
+moe_expert_parallel_size: $MOE_EP_SIZE
+pipeline_parallel_size: 1
+enable_attention_dp: $ENABLE_ATTENTION_DP
+
+cache_transceiver_config:
+  backend: UCX
+  max_tokens_in_buffer: $MAX_TOKENS_IN_BUFFER
+EOF
+```
+
+**Generation server configuration (`gen_extra-llm-api-config.yaml`):**
+
+```bash
+cat > gen_extra-llm-api-config.yaml << EOF
+backend: pytorch
+trust_remote_code: true
+disable_overlap_scheduler: false
+enable_chunked_prefill: true
+
+tensor_parallel_size: $TP_SIZE
+moe_expert_parallel_size: $MOE_EP_SIZE
+pipeline_parallel_size: 1
+enable_attention_dp: $ENABLE_ATTENTION_DP
+
+cache_transceiver_config:
+  backend: UCX
+  max_tokens_in_buffer: $MAX_TOKENS_IN_BUFFER
+EOF
+```
+
+**Disaggregated orchestrator configuration (`disagg_config.yaml`):**
+
+```bash
+cat > disagg_config.yaml << EOF
+hostname: localhost
+port: 8000
+backend: pytorch
+context_servers:
+  num_instances: 1
+  urls:
+    - "localhost:8001"
+generation_servers:
+  num_instances: 1
+  urls:
+    - "localhost:8002"
+EOF
+```
+
+#### Step 3: Launch the Disaggregated Server
+
+Start all components in the following order:
+
+```bash
+# 1. Start context server (GPUs 0-3)
+CUDA_VISIBLE_DEVICES=0,1,2,3 trtllm-serve $HF_MODEL_DIR \
+    --host localhost --port 8001 --enable_chunked_prefill \
+    --extra_llm_api_options ./ctx_extra-llm-api-config.yaml &> log_ctx.log &
+
+# 2. Start generation server (GPUs 4-7)
+CUDA_VISIBLE_DEVICES=4,5,6,7 trtllm-serve $HF_MODEL_DIR \
+    --host localhost --port 8002 --enable_chunked_prefill \
+    --extra_llm_api_options ./gen_extra-llm-api-config.yaml &> log_gen.log &
+
+# 3. Start disaggregated orchestrator
+trtllm-serve disaggregated -c disagg_config.yaml -t 360 -r 1200 &> log_disagg.log &
+```
+
+Once all servers are running, you can send requests to `http://localhost:8000/v1/completions` using the OpenAI API format.
+
+
+## TRT flow
+
+The next section describes how to convert weights from the [HuggingFace (HF) Transformers](https://github.com/huggingface/transformers) format to the TensorRT LLM format. We will use LLaMA's [convert_checkpoint.py](../llama/convert_checkpoint.py) for EXAONE models and then build the model with `trtllm-build`.

 ### Convert checkpoint and build TensorRT engine(s)

@ -141,7 +360,7 @@ trtllm-build \
    --output_dir trt_engines/exaone/fp16/1-gpu \
    --gemm_plugin auto

-# Build the EXAONE model using a single GPU and and apply INT8 weight-only quantization.
+# Build the EXAONE model using a single GPU and apply INT8 weight-only quantization.
 python ../llama/convert_checkpoint.py \
    --model_dir $HF_MODEL_DIR \
    --output_dir trt_models/exaone/int8_wq/1-gpu \
@ -154,7 +373,7 @@ trtllm-build \
    --output_dir trt_engines/exaone/int8_wq/1-gpu \
    --gemm_plugin auto

-# Build the EXAONE model using a single GPU and and apply INT4 weight-only quantization.
+# Build the EXAONE model using a single GPU and apply INT4 weight-only quantization.
 python ../llama/convert_checkpoint.py \
    --model_dir $HF_MODEL_DIR \
    --output_dir trt_models/exaone/int4_wq/1-gpu \
@ -183,18 +402,18 @@ trtllm-build \

 ### FP8 Post-Training Quantization

-The examples below uses the NVIDIA Modelopt (AlgorithMic Model Optimization) toolkit for the model quantization process.
+The examples below use the NVIDIA ModelOpt (AlgorithMic Model Optimization) toolkit for the model quantization process.

 First make sure Modelopt toolkit is installed (see [examples/quantization/README.md](/examples/quantization/README.md#preparation))

 ```bash
-# Build the EXAONE model using a single GPU and and apply FP8 quantization.
+# Build the EXAONE model using a single GPU and apply FP8 quantization.
 python ../../../quantization/quantize.py \
    --model_dir $HF_MODEL_DIR \
    --dtype float16 \
    --qformat fp8 \
    --kv_cache_dtype fp8 \
-    --output_dir trt_models/exaone/fp8/1-gpu \
+    --output_dir trt_models/exaone/fp8/1-gpu

 trtllm-build \
    --checkpoint_dir trt_models/exaone/fp8/1-gpu \
@ -204,12 +423,12 @@ trtllm-build \

 ### SmoothQuant

-The examples below uses the NVIDIA Modelopt (AlgorithMic Model Optimization) toolkit for the model quantization process.
+The examples below use the NVIDIA ModelOpt (AlgorithMic Model Optimization) toolkit for the model quantization process.

 First make sure Modelopt toolkit is installed (see [examples/quantization/README.md](/examples/quantization/README.md#preparation))

 ```bash
-# Build the EXAONE model using a single GPU and and apply INT8 SmoothQuant.
+# Build the EXAONE model using a single GPU and apply INT8 SmoothQuant.
 python ../../../quantization/quantize.py \
    --model_dir $HF_MODEL_DIR \
    --dtype float16 \
@ -224,12 +443,12 @@ trtllm-build \

 ### Groupwise quantization (AWQ)

-The examples below uses the NVIDIA Modelopt (AlgorithMic Model Optimization) toolkit for the model quantization process.
+The examples below use the NVIDIA ModelOpt (AlgorithMic Model Optimization) toolkit for the model quantization process.

 First make sure Modelopt toolkit is installed (see [examples/quantization/README.md](/examples/quantization/README.md#preparation))

 ```bash
-# Build the EXAONE model using a single GPU and and apply INT4 AWQ.
+# Build the EXAONE model using a single GPU and apply INT4 AWQ.
 python ../../../quantization/quantize.py \
    --model_dir $HF_MODEL_DIR \
    --dtype float16 \
@ -248,7 +467,7 @@ For Hopper GPUs, TRT-LLM also supports employing FP8 GEMM for accelerating linea
 Please make sure your system contains a Hopper GPU before trying the commands below.

 ```bash
-# Build the EXAONE model using a single GPU and and apply W4A8 AWQ.
+# Build the EXAONE model using a single GPU and apply W4A8 AWQ.
 python ../../../quantization/quantize.py \
    --model_dir $HF_MODEL_DIR \
    --dtype float16 \
@ -287,4 +506,50 @@ python ../../../summarize.py \
    --engine_dir trt_engines/exaone/fp16/1-gpu
 ```

-For more examples see [`examples/models/core/llama/README.md`](../llama/README.md)
+For more examples regarding EXAONE-3.0 & EXAONE-Deep's TRT flow, see [`examples/models/core/llama/README.md`](../llama/README.md)
+
+
+
+## Troubleshootings
+
+### Troubleshootings for EXAONE-4.0
+
+The following error may occur during quantization:
+```bash
+torch._dynamo.exc.Unsupported: Graph break under GenericContextWrappingVariable
+Explanation: Attempted to graph break in an active context manager(s) that doesn't support graph breaking.
+Hint: Move the offending context manager(s) to outside the compiled region.
+Hint: This graph break may have been caused by an earlier graph break. Resolving the earlier graph break may resolve this one.
+```
+
+This error may indicate an incompatibility between `torch.compile()` and the `HybridCache` module of the transformers library. As a result, [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer) (ModelOpt) cannot perform PTQ with HybridCache.
+
+Temporarily switching to `DynamicCache` when creating PTQ models could help address the issue. This can be done by updating the `cache_implementation` field in the `generation_config.json` file located in the model checkpoint directory, for example:
+```json
+# generation_config.json
+{
+    // Change "hybrid" to "dynamic" to run PTQ.
+    // Revert this to "hybrid" after quantization is complete.
+    "cache_implementation": "hybrid",
+    ...
+}
+```
+For models with sliding window attention, DynamicCache is less memory-efficient than HybridCache because it retains the entire key-value cache. However, this does not break the model's attention logic, as the cache implementation is separated from the attention computation itself. This trade-off is acceptable for the PTQ process, which is a one-time procedure. Our tests confirm that this workaround does not degrade accuracy on MMLU or GSM8K benchmarks with the default ModelOpt settings.
+
+### Troubleshootings for K-EXAONE
+
+K-EXAONE is a Mixture of Experts (MoE) model which activates 8 experts per token. When not enough tokens are given during the PTQ, some experts on some layers might not be activated and will not produce proper weights.
+
+To address this issue, provide enough data samples during calibration by increasing `calib_size` and `calib_seq` parameters:
+
+**FP8 Quantization:**
+```bash
+cd Model-Optimizer/examples/llm_ptq
+python3 hf_ptq.py --model hf_models/$MODEL_NAME --quant fp8 --export_fmt hf --calib_size 8192 --calib_seq 1024
+```
+
+**NVFP4 Quantization:**
+```bash
+cd Model-Optimizer/examples/llm_ptq
+python3 hf_ptq.py --model hf_models/$MODEL_NAME --quant nvfp4 --export_fmt hf --calib_size 8192 --calib_seq 1024
+```
--- a/examples/models/core/qwen/requirements.txt
+++ b/examples/models/core/qwen/requirements.txt
@ -10,7 +10,7 @@ tiktoken
 einops

 # optional dependencies
-gradio==4.44.1
+gradio==5.4.0
 mdtex2html
 sse_starlette
 aiohttp_sse_client
--- a/jenkins/L0_Test.groovy
+++ b/jenkins/L0_Test.groovy
@ -1155,7 +1155,7 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
                    export pytestCommand="$pytestCommand"
                    export coverageConfigFile="$coverageConfigFile"
                    export NVIDIA_IMEX_CHANNELS=\${NVIDIA_IMEX_CHANNELS:-0}
-                    export NVIDIA_VISIBLE_DEVICES=\${NVIDIA_VISIBLE_DEVICES:-\$(seq -s, 0 \$((\$(nvidia-smi --query-gpu=count -i 0 --format=noheader)-1)))}
+                    export NVIDIA_VISIBLE_DEVICES=\${NVIDIA_VISIBLE_DEVICES:-\$(seq -s, 0 \$((\$(nvidia-smi --query-gpu=count -i 0 --format=csv,noheader)-1)))}
                    ${envExportStatements}

                    echo "Env NVIDIA_IMEX_CHANNELS: \$NVIDIA_IMEX_CHANNELS"
@ -3249,10 +3249,12 @@ def launchTestJobs(pipeline, testFilter)
    fullSet = parallelJobs.keySet()

    x86SlurmTestConfigs = [
-        "DGX_H100-2_GPUs-PyTorch-Others-1": ["dgx-h100-x2-oci", "l0_dgx_h100", 1, 1, 2],
+        "DGX_H100-2_GPUs-PyTorch-Others-1": ["dgx-h100-x2-oci", "l0_dgx_h100", 1, 2, 2],
+        "DGX_H100-2_GPUs-PyTorch-Others-2": ["dgx-h100-x2-oci", "l0_dgx_h100", 2, 2, 2],
        "DGX_H100-2_GPUs-PyTorch-GptOss-1": ["dgx-h100-x2-oci", "l0_dgx_h100", 1, 1, 2],
        "DGX_H100-2_GPUs-PyTorch-Ray-1": ["dgx-h100-x2-oci", "l0_dgx_h100", 1, 1, 2],
-        "DGX_H100-4_GPUs-PyTorch-DeepSeek-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 1, 4],
+        "DGX_H100-4_GPUs-PyTorch-DeepSeek-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 2, 4],
+        "DGX_H100-4_GPUs-PyTorch-DeepSeek-2": ["dgx-h100-x4-oci", "l0_dgx_h100", 2, 2, 4],
        "DGX_H100-4_GPUs-PyTorch-GptOss-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 1, 4],
        "DGX_H100-4_GPUs-PyTorch-Others-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 1, 4],
        "DGX_H100-4_GPUs-PyTorch-Ray-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 1, 4],
--- a/jenkins/runPerfSanityTriage.groovy
+++ b/jenkins/runPerfSanityTriage.groovy
@ -0,0 +1,111 @@
+@Library(['bloom-jenkins-shared-lib@main', 'trtllm-jenkins-shared-lib@main']) _
+
+import java.lang.InterruptedException
+
+DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202510291120-8621"
+
+// LLM repository configuration
+withCredentials([string(credentialsId: 'default-llm-repo', variable: 'DEFAULT_LLM_REPO')]) {
+    LLM_REPO = env.gitlabSourceRepoHttpUrl ? env.gitlabSourceRepoHttpUrl : "${DEFAULT_LLM_REPO}"
+}
+LLM_ROOT = "llm"
+
+def createKubernetesPodConfig(image, arch = "amd64")
+{
+    def archSuffix = arch == "arm64" ? "arm" : "amd"
+    def jnlpImage = "urm.nvidia.com/sw-ipp-blossom-sre-docker-local/lambda/custom_jnlp_images_${archSuffix}_linux:jdk17"
+
+    def podConfig = [
+        cloud: "kubernetes-cpu",
+        namespace: "sw-tensorrt",
+        yaml: """
+            apiVersion: v1
+            kind: Pod
+            spec:
+                nodeSelector:
+                  nvidia.com/node_type: builder
+                  kubernetes.io/os: linux
+                containers:
+                  - name: trt-llm
+                    image: ${image}
+                    command: ['cat']
+                    volumeMounts:
+                    - name: sw-tensorrt-pvc
+                      mountPath: "/mnt/sw-tensorrt-pvc"
+                      readOnly: false
+                    tty: true
+                    resources:
+                      requests:
+                        cpu: 2
+                        memory: 5Gi
+                        ephemeral-storage: 25Gi
+                      limits:
+                        cpu: 2
+                        memory: 5Gi
+                        ephemeral-storage: 25Gi
+                    imagePullPolicy: Always
+                  - name: jnlp
+                    image: ${jnlpImage}
+                    args: ['\$(JENKINS_SECRET)', '\$(JENKINS_NAME)']
+                    resources:
+                      requests:
+                        cpu: '2'
+                        memory: 5Gi
+                        ephemeral-storage: 25Gi
+                      limits:
+                        cpu: '2'
+                        memory: 5Gi
+                        ephemeral-storage: 25Gi
+                qosClass: Guaranteed
+                volumes:
+                - name: sw-tensorrt-pvc
+                  persistentVolumeClaim:
+                    claimName: sw-tensorrt-pvc
+        """.stripIndent(),
+    ]
+
+    return podConfig
+}
+
+pipeline {
+    agent {
+        kubernetes createKubernetesPodConfig(DOCKER_IMAGE)
+    }
+    options {
+        timestamps()
+    }
+    environment {
+        OPEN_SEARCH_DB_BASE_URL=credentials("open_search_db_base_url")
+        OPEN_SEARCH_DB_CREDENTIALS=credentials("open_search_db_credentials")
+    }
+    parameters {
+        string(name: "BRANCH", defaultValue: "main", description: "Branch to checkout.")
+        string(name: "OPEN_SEARCH_PROJECT_NAME", defaultValue: "swdl-trtllm-infra-ci-prod-perf_sanity_info", description: "OpenSearch project name.")
+        string(name: "OPERATION", defaultValue: "SLACK BOT SENDS MESSAGE", description: "Operation to perform.")
+        string(name: "QUERY_JOB_NUMBER", defaultValue: "1", description: "Number of latest jobs to query.")
+        string(name: "SLACK_CHANNEL_ID", defaultValue: "C0A7D0LCA1F", description: "Slack channel IDs to send messages to.")
+        string(name: "SLACK_BOT_TOKEN", defaultValue: "", description: "Slack bot token for authentication.")
+    }
+    stages {
+        stage("Run Perf Sanity Script") {
+            steps {
+                container("trt-llm") {
+                    script {
+                        sh "pwd && ls -alh"
+                        sh "env | sort"
+                        trtllm_utils.checkoutSource(LLM_REPO, params.BRANCH, LLM_ROOT, false, false)
+                        sh "pip install slack_sdk"
+                        sh """
+                            cd ${LLM_ROOT}/jenkins/scripts/perf && ls -alh && python3 perf_sanity_triage.py \
+                            --project_name "${params.OPEN_SEARCH_PROJECT_NAME}" \
+                            --operation "${params.OPERATION}" \
+                            --channel_id "${params.SLACK_CHANNEL_ID}" \
+                            --bot_token "${params.SLACK_BOT_TOKEN}" \
+                            --query_job_number "${params.QUERY_JOB_NUMBER}"
+                        """
+                    }
+                }
+            }
+        } // stage Run Perf Sanity Script
+    } // stages
+} // pipeline
--- a/jenkins/scripts/perf/perf_sanity_triage.py
+++ b/jenkins/scripts/perf/perf_sanity_triage.py
@ -0,0 +1,251 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import sys
+import time
+
+from slack_sdk import WebClient
+from slack_sdk.errors import SlackApiError
+
+sys.path.insert(0, sys.path[0] + "/..")
+from open_search_db import OpenSearchDB
+
+QUERY_LOOKBACK_DAYS = 90
+MAX_QUERY_SIZE = 3000
+MAX_TEST_CASES_PER_MSG = 5
+POST_SLACK_MSG_RETRY_TIMES = 5
+
+
+def query_regression_data(project_name):
+    """Query regression data from OpenSearch database."""
+    last_days = QUERY_LOOKBACK_DAYS
+
+    must_clauses = [
+        {"term": {"b_is_valid": True}},
+        {"term": {"b_is_post_merge": True}},
+        {"term": {"b_is_regression": True}},
+        {"term": {"b_is_baseline": False}},
+        {
+            "range": {
+                "ts_created": {
+                    "gte": int(time.time() - 24 * 3600 * last_days)
+                    // (24 * 3600)
+                    * 24
+                    * 3600
+                    * 1000,
+                }
+            }
+        },
+    ]
+
+    json_data = {
+        "query": {
+            "bool": {"must": must_clauses},
+        },
+        "size": MAX_QUERY_SIZE,
+    }
+    json_data = json.dumps(json_data)
+
+    data_list = []
+    try:
+        res = OpenSearchDB.queryFromOpenSearchDB(json_data, project_name)
+        if res is None:
+            print(f"Failed to query from {project_name}, returned no response")
+            return None
+        payload = res.json().get("hits", {}).get("hits", [])
+        if len(payload) == 0:
+            print(f"No regression data found in {project_name}, returned empty list")
+            return []
+        for hit in payload:
+            data_dict = hit.get("_source", {})
+            data_dict["_id"] = hit.get("_id", "")
+            if data_dict["_id"] == "":
+                print(f"Failed to query from {project_name}, returned data with no _id")
+                return None
+            data_list.append(data_dict)
+        print(f"Successfully queried from {project_name}, queried {len(data_list)} entries")
+        return data_list
+    except Exception as e:
+        print(f"Failed to query from {project_name}, returned error: {e}")
+        return None
+
+
+def get_regression_data_by_job_id(data_list, query_job_number):
+    """Returns a dict with job_id as key and list of regression data as value.
+
+    Only returns the latest query_job_number jobs.
+    """
+    if data_list is None or len(data_list) == 0:
+        return {}
+
+    # Group data by job_id
+    job_data_dict = {}
+    for data in data_list:
+        job_id = data.get("s_job_id", "")
+        if job_id == "":
+            continue
+        if job_id not in job_data_dict:
+            job_data_dict[job_id] = []
+        job_data_dict[job_id].append(data)
+
+    # Sort job_ids by the latest ts_created in each group (descending)
+    def get_latest_timestamp(job_id):
+        timestamps = [d.get("ts_created", 0) for d in job_data_dict[job_id]]
+        return max(timestamps) if timestamps else 0
+
+    sorted_job_ids = sorted(job_data_dict.keys(), key=get_latest_timestamp, reverse=True)
+
+    # Only keep the latest query_job_number jobs
+    latest_job_ids = sorted_job_ids[:query_job_number]
+
+    result = {}
+    for job_id in latest_job_ids:
+        result[job_id] = job_data_dict[job_id]
+
+    return result
+
+
+def process_regression_message(regression_dict):
+    """Process regression data into message chunks.
+
+    Returns a list of messages, each containing at most MAX_TEST_CASES_PER_MSG test cases.
+    """
+    if not regression_dict:
+        return []
+
+    # Flatten all test cases into a list with (job_id, idx, data) tuples
+    all_test_cases = []
+    for job_id, data_list in regression_dict.items():
+        sorted_data_list = sorted(data_list, key=lambda x: x.get("s_test_case_name", ""))
+        for idx, data in enumerate(sorted_data_list, start=1):
+            all_test_cases.append((job_id, idx, data))
+
+    # Split into chunks of MAX_TEST_CASES_PER_MSG
+    chunks = []
+    for i in range(0, len(all_test_cases), MAX_TEST_CASES_PER_MSG):
+        chunks.append(all_test_cases[i : i + MAX_TEST_CASES_PER_MSG])
+
+    # Build messages for each chunk
+    messages = []
+    for chunk in chunks:
+        msg_parts = []
+        current_job_id = None
+        for job_id, idx, data in chunk:
+            # Add job header when switching to a new job_id
+            if job_id != current_job_id:
+                if msg_parts:
+                    msg_parts.append("\n")
+                job_header = f"*LLM/main/L0_PostMerge/{job_id}:*\n"
+                msg_parts.append(job_header)
+                current_job_id = job_id
+
+            test_case_name = data.get("s_test_case_name", "N/A")
+            regression_info = data.get("s_regression_info", "N/A")
+            msg_parts.append(f"*REGRESSION TEST CASE {idx}: {test_case_name}*\n")
+            for part in regression_info.split(","):
+                part = part.strip()
+                if part and "baseline_id" not in part:
+                    msg_parts.append(f"  {part}\n")
+
+        msg = "".join(msg_parts).strip()
+        messages.append(msg)
+
+    return messages
+
+
+def send_regression_message(messages, channel_id, bot_token):
+    """Send regression messages to Slack channel(s).
+
+    channel_id can be a single ID or multiple IDs separated by commas.
+    """
+    if not messages:
+        print("No regression data to send")
+        return
+
+    if channel_id and bot_token:
+        channel_ids = [cid.strip() for cid in channel_id.split(",") if cid.strip()]
+        for cid in channel_ids:
+            for msg in messages:
+                send_message(msg, cid, bot_token)
+    else:
+        print("Slack channel_id or bot_token not provided, printing message:")
+        for i, msg in enumerate(messages, start=1):
+            print(f"--- Message {i} ---")
+            print(msg)
+
+
+def send_message(msg, channel_id, bot_token):
+    """Send message to Slack channel using slack_sdk."""
+    client = WebClient(token=bot_token)
+
+    attachments = [
+        {
+            "title": "Perf Sanity Regression Report",
+            "color": "#ff0000",
+            "text": msg,
+        }
+    ]
+
+    for attempt in range(1, POST_SLACK_MSG_RETRY_TIMES + 1):
+        try:
+            result = client.chat_postMessage(
+                channel=channel_id,
+                attachments=attachments,
+            )
+            assert result["ok"] is True, json.dumps(result.data)
+            print(f"Message sent successfully to channel {channel_id}")
+            return
+        except SlackApiError as e:
+            print(
+                f"Attempt {attempt}/{POST_SLACK_MSG_RETRY_TIMES}: Error sending message to Slack: {e}"
+            )
+        except Exception as e:
+            print(f"Attempt {attempt}/{POST_SLACK_MSG_RETRY_TIMES}: Unexpected error: {e}")
+
+        if attempt < POST_SLACK_MSG_RETRY_TIMES:
+            time.sleep(1)
+
+    print(
+        f"Failed to send message to channel {channel_id} after {POST_SLACK_MSG_RETRY_TIMES} attempts"
+    )
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Perf Sanity Triage Script")
+    parser.add_argument("--project_name", type=str, required=True, help="OpenSearch project name")
+    parser.add_argument("--operation", type=str, required=True, help="Operation to perform")
+    parser.add_argument(
+        "--channel_id",
+        type=str,
+        default="",
+        help="Slack channel ID(s), comma-separated for multiple channels",
+    )
+    parser.add_argument("--bot_token", type=str, default="", help="Slack bot token")
+    parser.add_argument(
+        "--query_job_number", type=int, default=1, help="Number of latest jobs to query"
+    )
+
+    args = parser.parse_args()
+
+    print(f"Project Name: {args.project_name}")
+    print(f"Operation: {args.operation}")
+    print(f"Channel ID: {args.channel_id}")
+    print(f"Bot Token: {'***' if args.bot_token else 'Not provided'}")
+    print(f"Query Job Number: {args.query_job_number}")
+
+    if args.operation == "SLACK BOT SENDS MESSAGE":
+        data_list = query_regression_data(args.project_name)
+        if data_list is None:
+            print("Failed to query regression data")
+            return
+
+        regression_dict = get_regression_data_by_job_id(data_list, args.query_job_number)
+        messages = process_regression_message(regression_dict)
+        send_regression_message(messages, args.channel_id, args.bot_token)
+    else:
+        print(f"Unknown operation: {args.operation}")
+
+
+if __name__ == "__main__":
+    main()
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -37,3 +37,4 @@ opentelemetry-exporter-otlp>=1.26.0
 opentelemetry-semantic-conventions-ai>=0.4.1
 fuzzywuzzy==0.18.0
 aiperf==0.3.0
+nanobind>=2.9.0
--- a/security_scanning/cpp/kernels/fmha_v2/poetry.lock
+++ b/security_scanning/cpp/kernels/fmha_v2/poetry.lock
@ -150,53 +150,58 @@ testing = ["filelock"]

 [[package]]
 name = "tomli"
-version = "2.3.0"
+version = "2.4.0"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45"},
-    {file = "tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba"},
-    {file = "tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf"},
-    {file = "tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441"},
-    {file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845"},
-    {file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c"},
-    {file = "tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456"},
-    {file = "tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be"},
-    {file = "tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac"},
-    {file = "tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22"},
-    {file = "tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f"},
-    {file = "tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52"},
-    {file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8"},
-    {file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6"},
-    {file = "tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876"},
-    {file = "tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878"},
-    {file = "tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b"},
-    {file = "tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae"},
-    {file = "tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b"},
-    {file = "tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf"},
-    {file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f"},
-    {file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05"},
-    {file = "tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606"},
-    {file = "tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999"},
-    {file = "tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e"},
-    {file = "tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3"},
-    {file = "tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc"},
-    {file = "tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0"},
-    {file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879"},
-    {file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005"},
-    {file = "tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463"},
-    {file = "tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8"},
-    {file = "tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77"},
-    {file = "tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf"},
-    {file = "tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530"},
-    {file = "tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b"},
-    {file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67"},
-    {file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f"},
-    {file = "tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0"},
-    {file = "tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba"},
-    {file = "tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b"},
-    {file = "tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549"},
+    {file = "tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867"},
+    {file = "tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9"},
+    {file = "tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95"},
+    {file = "tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76"},
+    {file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d"},
+    {file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576"},
+    {file = "tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a"},
+    {file = "tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa"},
+    {file = "tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614"},
+    {file = "tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1"},
+    {file = "tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8"},
+    {file = "tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a"},
+    {file = "tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1"},
+    {file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b"},
+    {file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51"},
+    {file = "tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729"},
+    {file = "tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da"},
+    {file = "tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3"},
+    {file = "tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0"},
+    {file = "tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e"},
+    {file = "tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4"},
+    {file = "tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e"},
+    {file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c"},
+    {file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f"},
+    {file = "tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86"},
+    {file = "tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87"},
+    {file = "tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132"},
+    {file = "tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6"},
+    {file = "tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc"},
+    {file = "tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66"},
+    {file = "tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d"},
+    {file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702"},
+    {file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8"},
+    {file = "tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776"},
+    {file = "tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475"},
+    {file = "tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2"},
+    {file = "tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9"},
+    {file = "tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0"},
+    {file = "tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df"},
+    {file = "tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d"},
+    {file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f"},
+    {file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b"},
+    {file = "tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087"},
+    {file = "tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd"},
+    {file = "tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4"},
+    {file = "tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a"},
+    {file = "tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c"},
 ]

 [[package]]
--- a/security_scanning/docs/poetry.lock
+++ b/security_scanning/docs/poetry.lock
@ -1119,53 +1119,58 @@ test = ["pytest"]

 [[package]]
 name = "tomli"
-version = "2.3.0"
+version = "2.4.0"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45"},
-    {file = "tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba"},
-    {file = "tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf"},
-    {file = "tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441"},
-    {file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845"},
-    {file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c"},
-    {file = "tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456"},
-    {file = "tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be"},
-    {file = "tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac"},
-    {file = "tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22"},
-    {file = "tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f"},
-    {file = "tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52"},
-    {file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8"},
-    {file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6"},
-    {file = "tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876"},
-    {file = "tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878"},
-    {file = "tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b"},
-    {file = "tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae"},
-    {file = "tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b"},
-    {file = "tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf"},
-    {file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f"},
-    {file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05"},
-    {file = "tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606"},
-    {file = "tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999"},
-    {file = "tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e"},
-    {file = "tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3"},
-    {file = "tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc"},
-    {file = "tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0"},
-    {file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879"},
-    {file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005"},
-    {file = "tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463"},
-    {file = "tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8"},
-    {file = "tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77"},
-    {file = "tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf"},
-    {file = "tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530"},
-    {file = "tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b"},
-    {file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67"},
-    {file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f"},
-    {file = "tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0"},
-    {file = "tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba"},
-    {file = "tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b"},
-    {file = "tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549"},
+    {file = "tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867"},
+    {file = "tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9"},
+    {file = "tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95"},
+    {file = "tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76"},
+    {file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d"},
+    {file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576"},
+    {file = "tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a"},
+    {file = "tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa"},
+    {file = "tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614"},
+    {file = "tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1"},
+    {file = "tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8"},
+    {file = "tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a"},
+    {file = "tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1"},
+    {file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b"},
+    {file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51"},
+    {file = "tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729"},
+    {file = "tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da"},
+    {file = "tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3"},
+    {file = "tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0"},
+    {file = "tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e"},
+    {file = "tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4"},
+    {file = "tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e"},
+    {file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c"},
+    {file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f"},
+    {file = "tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86"},
+    {file = "tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87"},
+    {file = "tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132"},
+    {file = "tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6"},
+    {file = "tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc"},
+    {file = "tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66"},
+    {file = "tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d"},
+    {file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702"},
+    {file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8"},
+    {file = "tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776"},
+    {file = "tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475"},
+    {file = "tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2"},
+    {file = "tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9"},
+    {file = "tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0"},
+    {file = "tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df"},
+    {file = "tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d"},
+    {file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f"},
+    {file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b"},
+    {file = "tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087"},
+    {file = "tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd"},
+    {file = "tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4"},
+    {file = "tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a"},
+    {file = "tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c"},
 ]

 [[package]]
--- a/security_scanning/examples/apps/poetry.lock
+++ b/security_scanning/examples/apps/poetry.lock
@ -263,13 +263,13 @@ files = [

 [[package]]
 name = "openai"
-version = "2.14.0"
+version = "2.15.0"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "openai-2.14.0-py3-none-any.whl", hash = "sha256:7ea40aca4ffc4c4a776e77679021b47eec1160e341f42ae086ba949c9dcc9183"},
-    {file = "openai-2.14.0.tar.gz", hash = "sha256:419357bedde9402d23bf8f2ee372fca1985a73348debba94bddff06f19459952"},
+    {file = "openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3"},
+    {file = "openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/auto_deploy/poetry.lock
+++ b/security_scanning/examples/auto_deploy/poetry.lock
@ -665,13 +665,13 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -2203,13 +2203,13 @@ test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1

 [[package]]
 name = "peft"
-version = "0.18.0"
+version = "0.18.1"
 description = "Parameter-Efficient Fine-Tuning (PEFT)"
 optional = false
 python-versions = ">=3.10.0"
 files = [
-    {file = "peft-0.18.0-py3-none-any.whl", hash = "sha256:624f69ca6393b765ccc6734adda7ca57d80b238f0900a42c357d8b67a03d62ff"},
-    {file = "peft-0.18.0.tar.gz", hash = "sha256:c81c80b2056ab40c23d58ef25f74daab417ac653970718589a11a8af28218588"},
+    {file = "peft-0.18.1-py3-none-any.whl", hash = "sha256:0bf06847a3551e3019fc58c440cffc9a6b73e6e2962c95b52e224f77bbdb50f1"},
+    {file = "peft-0.18.1.tar.gz", hash = "sha256:2dd0d6bfce936d1850e48aaddbd250941c5c02fc8ef3237cd8fd5aac35e0bae2"},
 ]

 [package.dependencies]
@ -3705,13 +3705,13 @@ files = [

 [[package]]
 name = "werkzeug"
-version = "3.1.4"
+version = "3.1.5"
 description = "The comprehensive WSGI web application library."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "werkzeug-3.1.4-py3-none-any.whl", hash = "sha256:2ad50fb9ed09cc3af22c54698351027ace879a0b60a3b5edf5730b2f7d876905"},
-    {file = "werkzeug-3.1.4.tar.gz", hash = "sha256:cd3cd98b1b92dc3b7b3995038826c68097dcb16f9baa63abe35f20eafeb9fe5e"},
+    {file = "werkzeug-3.1.5-py3-none-any.whl", hash = "sha256:5111e36e91086ece91f93268bb39b4a35c1e6f1feac762c9c822ded0a4e322dc"},
+    {file = "werkzeug-3.1.5.tar.gz", hash = "sha256:6a548b0e88955dd07ccb25539d7d0cc97417ee9e179677d22c7041c8f078ce67"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/draft_target_model/poetry.lock
+++ b/security_scanning/examples/draft_target_model/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/eagle/poetry.lock
+++ b/security_scanning/examples/eagle/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/llm-eval/lm-eval-harness/poetry.lock
+++ b/security_scanning/examples/llm-eval/lm-eval-harness/poetry.lock
@ -552,13 +552,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -1970,13 +1970,13 @@ test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1

 [[package]]
 name = "peft"
-version = "0.18.0"
+version = "0.18.1"
 description = "Parameter-Efficient Fine-Tuning (PEFT)"
 optional = false
 python-versions = ">=3.10.0"
 files = [
-    {file = "peft-0.18.0-py3-none-any.whl", hash = "sha256:624f69ca6393b765ccc6734adda7ca57d80b238f0900a42c357d8b67a03d62ff"},
-    {file = "peft-0.18.0.tar.gz", hash = "sha256:c81c80b2056ab40c23d58ef25f74daab417ac653970718589a11a8af28218588"},
+    {file = "peft-0.18.1-py3-none-any.whl", hash = "sha256:0bf06847a3551e3019fc58c440cffc9a6b73e6e2962c95b52e224f77bbdb50f1"},
+    {file = "peft-0.18.1.tar.gz", hash = "sha256:2dd0d6bfce936d1850e48aaddbd250941c5c02fc8ef3237cd8fd5aac35e0bae2"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/lookahead/poetry.lock
+++ b/security_scanning/examples/lookahead/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/medusa/poetry.lock
+++ b/security_scanning/examples/medusa/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/baichuan/poetry.lock
+++ b/security_scanning/examples/models/contrib/baichuan/poetry.lock
@ -499,13 +499,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -781,13 +781,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/bloom/poetry.lock
+++ b/security_scanning/examples/models/contrib/bloom/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/chatglm-6b/poetry.lock
+++ b/security_scanning/examples/models/contrib/chatglm-6b/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
@ -1343,21 +1343,21 @@ files = [

 [[package]]
 name = "protobuf"
-version = "6.33.2"
+version = "6.33.3"
 description = ""
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
-    {file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
-    {file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
-    {file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
-    {file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
-    {file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
-    {file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
+    {file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
+    {file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
+    {file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
+    {file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
+    {file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
+    {file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
+    {file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
 ]

 [[package]]
@ -2234,4 +2234,4 @@ propcache = ">=0.2.1"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "072741ab4fb1cf908a6e193f60f71e7205d9e9d3c74839bc9a0d153046c675cf"
+content-hash = "afb9d901c2b136e6d39517fb48361cf0e030ee48cdfb15f5486ac8d83e8f48ca"
--- a/security_scanning/examples/models/contrib/chatglm-6b/pyproject.toml
+++ b/security_scanning/examples/models/contrib/chatglm-6b/pyproject.toml
@ -9,7 +9,7 @@ readme = "README.md"
 python = ">=3.10,<3.13"
 datasets = "3.1.0"
 evaluate = "^0.4.6"
-protobuf = "^6.33.2"
+protobuf = "^6.33.3"
 rouge-score = "^0.1.2"
 sentencepiece = "^0.2.1"
 tiktoken = "^0.12.0"
--- a/security_scanning/examples/models/contrib/chatglm2-6b/poetry.lock
+++ b/security_scanning/examples/models/contrib/chatglm2-6b/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
@ -1343,21 +1343,21 @@ files = [

 [[package]]
 name = "protobuf"
-version = "6.33.2"
+version = "6.33.3"
 description = ""
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
-    {file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
-    {file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
-    {file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
-    {file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
-    {file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
-    {file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
+    {file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
+    {file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
+    {file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
+    {file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
+    {file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
+    {file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
+    {file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
 ]

 [[package]]
@ -2234,4 +2234,4 @@ propcache = ">=0.2.1"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "072741ab4fb1cf908a6e193f60f71e7205d9e9d3c74839bc9a0d153046c675cf"
+content-hash = "afb9d901c2b136e6d39517fb48361cf0e030ee48cdfb15f5486ac8d83e8f48ca"
--- a/security_scanning/examples/models/contrib/chatglm2-6b/pyproject.toml
+++ b/security_scanning/examples/models/contrib/chatglm2-6b/pyproject.toml
@ -9,7 +9,7 @@ readme = "README.md"
 python = ">=3.10,<3.13"
 datasets = "3.1.0"
 evaluate = "^0.4.6"
-protobuf = "^6.33.2"
+protobuf = "^6.33.3"
 rouge-score = "^0.1.2"
 sentencepiece = "^0.2.1"
 tiktoken = "^0.12.0"
--- a/security_scanning/examples/models/contrib/chatglm3-6b-32k/poetry.lock
+++ b/security_scanning/examples/models/contrib/chatglm3-6b-32k/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
@ -1343,21 +1343,21 @@ files = [

 [[package]]
 name = "protobuf"
-version = "6.33.2"
+version = "6.33.3"
 description = ""
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
-    {file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
-    {file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
-    {file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
-    {file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
-    {file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
-    {file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
+    {file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
+    {file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
+    {file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
+    {file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
+    {file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
+    {file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
+    {file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
 ]

 [[package]]
@ -2234,4 +2234,4 @@ propcache = ">=0.2.1"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "072741ab4fb1cf908a6e193f60f71e7205d9e9d3c74839bc9a0d153046c675cf"
+content-hash = "afb9d901c2b136e6d39517fb48361cf0e030ee48cdfb15f5486ac8d83e8f48ca"
--- a/security_scanning/examples/models/contrib/chatglm3-6b-32k/pyproject.toml
+++ b/security_scanning/examples/models/contrib/chatglm3-6b-32k/pyproject.toml
@ -9,7 +9,7 @@ readme = "README.md"
 python = ">=3.10,<3.13"
 datasets = "3.1.0"
 evaluate = "^0.4.6"
-protobuf = "^6.33.2"
+protobuf = "^6.33.3"
 rouge-score = "^0.1.2"
 sentencepiece = "^0.2.1"
 tiktoken = "^0.12.0"
--- a/security_scanning/examples/models/contrib/dbrx/poetry.lock
+++ b/security_scanning/examples/models/contrib/dbrx/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/deepseek_v1/poetry.lock
+++ b/security_scanning/examples/models/contrib/deepseek_v1/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/deepseek_v2/poetry.lock
+++ b/security_scanning/examples/models/contrib/deepseek_v2/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/falcon/poetry.lock
+++ b/security_scanning/examples/models/contrib/falcon/poetry.lock
@ -453,13 +453,13 @@ torch = ["torch"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
--- a/security_scanning/examples/models/contrib/gptj/poetry.lock
+++ b/security_scanning/examples/models/contrib/gptj/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/gptneox/poetry.lock
+++ b/security_scanning/examples/models/contrib/gptneox/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/grok/poetry.lock
+++ b/security_scanning/examples/models/contrib/grok/poetry.lock
@ -567,13 +567,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -881,13 +881,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
@ -1955,21 +1955,21 @@ files = [

 [[package]]
 name = "protobuf"
-version = "6.33.2"
+version = "6.33.3"
 description = ""
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
-    {file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
-    {file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
-    {file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
-    {file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
-    {file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
-    {file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
+    {file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
+    {file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
+    {file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
+    {file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
+    {file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
+    {file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
+    {file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
 ]

 [[package]]
@ -2396,80 +2396,80 @@ test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis

 [[package]]
 name = "scipy"
-version = "1.16.3"
+version = "1.17.0"
 description = "Fundamental algorithms for scientific computing in Python"
 optional = false
 python-versions = ">=3.11"
 files = [
-    {file = "scipy-1.16.3-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:40be6cf99e68b6c4321e9f8782e7d5ff8265af28ef2cd56e9c9b2638fa08ad97"},
-    {file = "scipy-1.16.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:8be1ca9170fcb6223cc7c27f4305d680ded114a1567c0bd2bfcbf947d1b17511"},
-    {file = "scipy-1.16.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:bea0a62734d20d67608660f69dcda23e7f90fb4ca20974ab80b6ed40df87a005"},
-    {file = "scipy-1.16.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:2a207a6ce9c24f1951241f4693ede2d393f59c07abc159b2cb2be980820e01fb"},
-    {file = "scipy-1.16.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:532fb5ad6a87e9e9cd9c959b106b73145a03f04c7d57ea3e6f6bb60b86ab0876"},
-    {file = "scipy-1.16.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0151a0749efeaaab78711c78422d413c583b8cdd2011a3c1d6c794938ee9fdb2"},
-    {file = "scipy-1.16.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7180967113560cca57418a7bc719e30366b47959dd845a93206fbed693c867e"},
-    {file = "scipy-1.16.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:deb3841c925eeddb6afc1e4e4a45e418d19ec7b87c5df177695224078e8ec733"},
-    {file = "scipy-1.16.3-cp311-cp311-win_amd64.whl", hash = "sha256:53c3844d527213631e886621df5695d35e4f6a75f620dca412bcd292f6b87d78"},
-    {file = "scipy-1.16.3-cp311-cp311-win_arm64.whl", hash = "sha256:9452781bd879b14b6f055b26643703551320aa8d79ae064a71df55c00286a184"},
-    {file = "scipy-1.16.3-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:81fc5827606858cf71446a5e98715ba0e11f0dbc83d71c7409d05486592a45d6"},
-    {file = "scipy-1.16.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:c97176013d404c7346bf57874eaac5187d969293bf40497140b0a2b2b7482e07"},
-    {file = "scipy-1.16.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:2b71d93c8a9936046866acebc915e2af2e292b883ed6e2cbe5c34beb094b82d9"},
-    {file = "scipy-1.16.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:3d4a07a8e785d80289dfe66b7c27d8634a773020742ec7187b85ccc4b0e7b686"},
-    {file = "scipy-1.16.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0553371015692a898e1aa858fed67a3576c34edefa6b7ebdb4e9dde49ce5c203"},
-    {file = "scipy-1.16.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:72d1717fd3b5e6ec747327ce9bda32d5463f472c9dce9f54499e81fbd50245a1"},
-    {file = "scipy-1.16.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1fb2472e72e24d1530debe6ae078db70fb1605350c88a3d14bc401d6306dbffe"},
-    {file = "scipy-1.16.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c5192722cffe15f9329a3948c4b1db789fbb1f05c97899187dcf009b283aea70"},
-    {file = "scipy-1.16.3-cp312-cp312-win_amd64.whl", hash = "sha256:56edc65510d1331dae01ef9b658d428e33ed48b4f77b1d51caf479a0253f96dc"},
-    {file = "scipy-1.16.3-cp312-cp312-win_arm64.whl", hash = "sha256:a8a26c78ef223d3e30920ef759e25625a0ecdd0d60e5a8818b7513c3e5384cf2"},
-    {file = "scipy-1.16.3-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:d2ec56337675e61b312179a1ad124f5f570c00f920cc75e1000025451b88241c"},
-    {file = "scipy-1.16.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:16b8bc35a4cc24db80a0ec836a9286d0e31b2503cb2fd7ff7fb0e0374a97081d"},
-    {file = "scipy-1.16.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:5803c5fadd29de0cf27fa08ccbfe7a9e5d741bf63e4ab1085437266f12460ff9"},
-    {file = "scipy-1.16.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:b81c27fc41954319a943d43b20e07c40bdcd3ff7cf013f4fb86286faefe546c4"},
-    {file = "scipy-1.16.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0c3b4dd3d9b08dbce0f3440032c52e9e2ab9f96ade2d3943313dfe51a7056959"},
-    {file = "scipy-1.16.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7dc1360c06535ea6116a2220f760ae572db9f661aba2d88074fe30ec2aa1ff88"},
-    {file = "scipy-1.16.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:663b8d66a8748051c3ee9c96465fb417509315b99c71550fda2591d7dd634234"},
-    {file = "scipy-1.16.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eab43fae33a0c39006a88096cd7b4f4ef545ea0447d250d5ac18202d40b6611d"},
-    {file = "scipy-1.16.3-cp313-cp313-win_amd64.whl", hash = "sha256:062246acacbe9f8210de8e751b16fc37458213f124bef161a5a02c7a39284304"},
-    {file = "scipy-1.16.3-cp313-cp313-win_arm64.whl", hash = "sha256:50a3dbf286dbc7d84f176f9a1574c705f277cb6565069f88f60db9eafdbe3ee2"},
-    {file = "scipy-1.16.3-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:fb4b29f4cf8cc5a8d628bc8d8e26d12d7278cd1f219f22698a378c3d67db5e4b"},
-    {file = "scipy-1.16.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:8d09d72dc92742988b0e7750bddb8060b0c7079606c0d24a8cc8e9c9c11f9079"},
-    {file = "scipy-1.16.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:03192a35e661470197556de24e7cb1330d84b35b94ead65c46ad6f16f6b28f2a"},
-    {file = "scipy-1.16.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:57d01cb6f85e34f0946b33caa66e892aae072b64b034183f3d87c4025802a119"},
-    {file = "scipy-1.16.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:96491a6a54e995f00a28a3c3badfff58fd093bf26cd5fb34a2188c8c756a3a2c"},
-    {file = "scipy-1.16.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cd13e354df9938598af2be05822c323e97132d5e6306b83a3b4ee6724c6e522e"},
-    {file = "scipy-1.16.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:63d3cdacb8a824a295191a723ee5e4ea7768ca5ca5f2838532d9f2e2b3ce2135"},
-    {file = "scipy-1.16.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e7efa2681ea410b10dde31a52b18b0154d66f2485328830e45fdf183af5aefc6"},
-    {file = "scipy-1.16.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2d1ae2cf0c350e7705168ff2429962a89ad90c2d49d1dd300686d8b2a5af22fc"},
-    {file = "scipy-1.16.3-cp313-cp313t-win_arm64.whl", hash = "sha256:0c623a54f7b79dd88ef56da19bc2873afec9673a48f3b85b18e4d402bdd29a5a"},
-    {file = "scipy-1.16.3-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:875555ce62743e1d54f06cdf22c1e0bc47b91130ac40fe5d783b6dfa114beeb6"},
-    {file = "scipy-1.16.3-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:bb61878c18a470021fb515a843dc7a76961a8daceaaaa8bad1332f1bf4b54657"},
-    {file = "scipy-1.16.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f2622206f5559784fa5c4b53a950c3c7c1cf3e84ca1b9c4b6c03f062f289ca26"},
-    {file = "scipy-1.16.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7f68154688c515cdb541a31ef8eb66d8cd1050605be9dcd74199cbd22ac739bc"},
-    {file = "scipy-1.16.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b3c820ddb80029fe9f43d61b81d8b488d3ef8ca010d15122b152db77dc94c22"},
-    {file = "scipy-1.16.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d3837938ae715fc0fe3c39c0202de3a8853aff22ca66781ddc2ade7554b7e2cc"},
-    {file = "scipy-1.16.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:aadd23f98f9cb069b3bd64ddc900c4d277778242e961751f77a8cb5c4b946fb0"},
-    {file = "scipy-1.16.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b7c5f1bda1354d6a19bc6af73a649f8285ca63ac6b52e64e658a5a11d4d69800"},
-    {file = "scipy-1.16.3-cp314-cp314-win_amd64.whl", hash = "sha256:e5d42a9472e7579e473879a1990327830493a7047506d58d73fc429b84c1d49d"},
-    {file = "scipy-1.16.3-cp314-cp314-win_arm64.whl", hash = "sha256:6020470b9d00245926f2d5bb93b119ca0340f0d564eb6fbaad843eaebf9d690f"},
-    {file = "scipy-1.16.3-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:e1d27cbcb4602680a49d787d90664fa4974063ac9d4134813332a8c53dbe667c"},
-    {file = "scipy-1.16.3-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:9b9c9c07b6d56a35777a1b4cc8966118fb16cfd8daf6743867d17d36cfad2d40"},
-    {file = "scipy-1.16.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:3a4c460301fb2cffb7f88528f30b3127742cff583603aa7dc964a52c463b385d"},
-    {file = "scipy-1.16.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:f667a4542cc8917af1db06366d3f78a5c8e83badd56409f94d1eac8d8d9133fa"},
-    {file = "scipy-1.16.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f379b54b77a597aa7ee5e697df0d66903e41b9c85a6dd7946159e356319158e8"},
-    {file = "scipy-1.16.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4aff59800a3b7f786b70bfd6ab551001cb553244988d7d6b8299cb1ea653b353"},
-    {file = "scipy-1.16.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:da7763f55885045036fabcebd80144b757d3db06ab0861415d1c3b7c69042146"},
-    {file = "scipy-1.16.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ffa6eea95283b2b8079b821dc11f50a17d0571c92b43e2b5b12764dc5f9b285d"},
-    {file = "scipy-1.16.3-cp314-cp314t-win_amd64.whl", hash = "sha256:d9f48cafc7ce94cf9b15c6bffdc443a81a27bf7075cf2dcd5c8b40f85d10c4e7"},
-    {file = "scipy-1.16.3-cp314-cp314t-win_arm64.whl", hash = "sha256:21d9d6b197227a12dcbf9633320a4e34c6b0e51c57268df255a0942983bac562"},
-    {file = "scipy-1.16.3.tar.gz", hash = "sha256:01e87659402762f43bd2fee13370553a17ada367d42e7487800bf2916535aecb"},
+    {file = "scipy-1.17.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:2abd71643797bd8a106dff97894ff7869eeeb0af0f7a5ce02e4227c6a2e9d6fd"},
+    {file = "scipy-1.17.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:ef28d815f4d2686503e5f4f00edc387ae58dfd7a2f42e348bb53359538f01558"},
+    {file = "scipy-1.17.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:272a9f16d6bb4667e8b50d25d71eddcc2158a214df1b566319298de0939d2ab7"},
+    {file = "scipy-1.17.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:7204fddcbec2fe6598f1c5fdf027e9f259106d05202a959a9f1aecf036adc9f6"},
+    {file = "scipy-1.17.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fc02c37a5639ee67d8fb646ffded6d793c06c5622d36b35cfa8fe5ececb8f042"},
+    {file = "scipy-1.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dac97a27520d66c12a34fd90a4fe65f43766c18c0d6e1c0a80f114d2260080e4"},
+    {file = "scipy-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ebb7446a39b3ae0fe8f416a9a3fdc6fba3f11c634f680f16a239c5187bc487c0"},
+    {file = "scipy-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:474da16199f6af66601a01546144922ce402cb17362e07d82f5a6cf8f963e449"},
+    {file = "scipy-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:255c0da161bd7b32a6c898e7891509e8a9289f0b1c6c7d96142ee0d2b114c2ea"},
+    {file = "scipy-1.17.0-cp311-cp311-win_arm64.whl", hash = "sha256:85b0ac3ad17fa3be50abd7e69d583d98792d7edc08367e01445a1e2076005379"},
+    {file = "scipy-1.17.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:0d5018a57c24cb1dd828bcf51d7b10e65986d549f52ef5adb6b4d1ded3e32a57"},
+    {file = "scipy-1.17.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:88c22af9e5d5a4f9e027e26772cc7b5922fab8bcc839edb3ae33de404feebd9e"},
+    {file = "scipy-1.17.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f3cd947f20fe17013d401b64e857c6b2da83cae567adbb75b9dcba865abc66d8"},
+    {file = "scipy-1.17.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e8c0b331c2c1f531eb51f1b4fc9ba709521a712cce58f1aa627bc007421a5306"},
+    {file = "scipy-1.17.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5194c445d0a1c7a6c1a4a4681b6b7c71baad98ff66d96b949097e7513c9d6742"},
+    {file = "scipy-1.17.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9eeb9b5f5997f75507814ed9d298ab23f62cf79f5a3ef90031b1ee2506abdb5b"},
+    {file = "scipy-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:40052543f7bbe921df4408f46003d6f01c6af109b9e2c8a66dd1cf6cf57f7d5d"},
+    {file = "scipy-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0cf46c8013fec9d3694dc572f0b54100c28405d55d3e2cb15e2895b25057996e"},
+    {file = "scipy-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:0937a0b0d8d593a198cededd4c439a0ea216a3f36653901ea1f3e4be949056f8"},
+    {file = "scipy-1.17.0-cp312-cp312-win_arm64.whl", hash = "sha256:f603d8a5518c7426414d1d8f82e253e454471de682ce5e39c29adb0df1efb86b"},
+    {file = "scipy-1.17.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:65ec32f3d32dfc48c72df4291345dae4f048749bc8d5203ee0a3f347f96c5ce6"},
+    {file = "scipy-1.17.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1f9586a58039d7229ce77b52f8472c972448cded5736eaf102d5658bbac4c269"},
+    {file = "scipy-1.17.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9fad7d3578c877d606b1150135c2639e9de9cecd3705caa37b66862977cc3e72"},
+    {file = "scipy-1.17.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:423ca1f6584fc03936972b5f7c06961670dbba9f234e71676a7c7ccf938a0d61"},
+    {file = "scipy-1.17.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fe508b5690e9eaaa9467fc047f833af58f1152ae51a0d0aed67aa5801f4dd7d6"},
+    {file = "scipy-1.17.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6680f2dfd4f6182e7d6db161344537da644d1cf85cf293f015c60a17ecf08752"},
+    {file = "scipy-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eec3842ec9ac9de5917899b277428886042a93db0b227ebbe3a333b64ec7643d"},
+    {file = "scipy-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d7425fcafbc09a03731e1bc05581f5fad988e48c6a861f441b7ab729a49a55ea"},
+    {file = "scipy-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:87b411e42b425b84777718cc41516b8a7e0795abfa8e8e1d573bf0ef014f0812"},
+    {file = "scipy-1.17.0-cp313-cp313-win_arm64.whl", hash = "sha256:357ca001c6e37601066092e7c89cca2f1ce74e2a520ca78d063a6d2201101df2"},
+    {file = "scipy-1.17.0-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:ec0827aa4d36cb79ff1b81de898e948a51ac0b9b1c43e4a372c0508c38c0f9a3"},
+    {file = "scipy-1.17.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:819fc26862b4b3c73a60d486dbb919202f3d6d98c87cf20c223511429f2d1a97"},
+    {file = "scipy-1.17.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:363ad4ae2853d88ebcde3ae6ec46ccca903ea9835ee8ba543f12f575e7b07e4e"},
+    {file = "scipy-1.17.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:979c3a0ff8e5ba254d45d59ebd38cde48fce4f10b5125c680c7a4bfe177aab07"},
+    {file = "scipy-1.17.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:130d12926ae34399d157de777472bf82e9061c60cc081372b3118edacafe1d00"},
+    {file = "scipy-1.17.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e886000eb4919eae3a44f035e63f0fd8b651234117e8f6f29bad1cd26e7bc45"},
+    {file = "scipy-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:13c4096ac6bc31d706018f06a49abe0485f96499deb82066b94d19b02f664209"},
+    {file = "scipy-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cacbaddd91fcffde703934897c5cd2c7cb0371fac195d383f4e1f1c5d3f3bd04"},
+    {file = "scipy-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:edce1a1cf66298cccdc48a1bdf8fb10a3bf58e8b58d6c3883dd1530e103f87c0"},
+    {file = "scipy-1.17.0-cp313-cp313t-win_arm64.whl", hash = "sha256:30509da9dbec1c2ed8f168b8d8aa853bc6723fede1dbc23c7d43a56f5ab72a67"},
+    {file = "scipy-1.17.0-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:c17514d11b78be8f7e6331b983a65a7f5ca1fd037b95e27b280921fe5606286a"},
+    {file = "scipy-1.17.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:4e00562e519c09da34c31685f6acc3aa384d4d50604db0f245c14e1b4488bfa2"},
+    {file = "scipy-1.17.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f7df7941d71314e60a481e02d5ebcb3f0185b8d799c70d03d8258f6c80f3d467"},
+    {file = "scipy-1.17.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:aabf057c632798832f071a8dde013c2e26284043934f53b00489f1773b33527e"},
+    {file = "scipy-1.17.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a38c3337e00be6fd8a95b4ed66b5d988bac4ec888fd922c2ea9fe5fb1603dd67"},
+    {file = "scipy-1.17.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00fb5f8ec8398ad90215008d8b6009c9db9fa924fd4c7d6be307c6f945f9cd73"},
+    {file = "scipy-1.17.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f2a4942b0f5f7c23c7cd641a0ca1955e2ae83dedcff537e3a0259096635e186b"},
+    {file = "scipy-1.17.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:dbf133ced83889583156566d2bdf7a07ff89228fe0c0cb727f777de92092ec6b"},
+    {file = "scipy-1.17.0-cp314-cp314-win_amd64.whl", hash = "sha256:3625c631a7acd7cfd929e4e31d2582cf00f42fcf06011f59281271746d77e061"},
+    {file = "scipy-1.17.0-cp314-cp314-win_arm64.whl", hash = "sha256:9244608d27eafe02b20558523ba57f15c689357c85bdcfe920b1828750aa26eb"},
+    {file = "scipy-1.17.0-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:2b531f57e09c946f56ad0b4a3b2abee778789097871fc541e267d2eca081cff1"},
+    {file = "scipy-1.17.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:13e861634a2c480bd237deb69333ac79ea1941b94568d4b0efa5db5e263d4fd1"},
+    {file = "scipy-1.17.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:eb2651271135154aa24f6481cbae5cc8af1f0dd46e6533fb7b56aa9727b6a232"},
+    {file = "scipy-1.17.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:c5e8647f60679790c2f5c76be17e2e9247dc6b98ad0d3b065861e082c56e078d"},
+    {file = "scipy-1.17.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5fb10d17e649e1446410895639f3385fd2bf4c3c7dfc9bea937bddcbc3d7b9ba"},
+    {file = "scipy-1.17.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8547e7c57f932e7354a2319fab613981cde910631979f74c9b542bb167a8b9db"},
+    {file = "scipy-1.17.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33af70d040e8af9d5e7a38b5ed3b772adddd281e3062ff23fec49e49681c38cf"},
+    {file = "scipy-1.17.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb55bb97d00f8b7ab95cb64f873eb0bf54d9446264d9f3609130381233483f"},
+    {file = "scipy-1.17.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1ff269abf702f6c7e67a4b7aad981d42871a11b9dd83c58d2d2ea624efbd1088"},
+    {file = "scipy-1.17.0-cp314-cp314t-win_arm64.whl", hash = "sha256:031121914e295d9791319a1875444d55079885bbae5bdc9c5e0f2ee5f09d34ff"},
+    {file = "scipy-1.17.0.tar.gz", hash = "sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e"},
 ]

 [package.dependencies]
-numpy = ">=1.25.2,<2.6"
+numpy = ">=1.26.4,<2.7"

 [package.extras]
-dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"]
-doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.19.1)", "jupytext", "linkify-it-py", "matplotlib (>=3.5)", "myst-nb (>=1.2.0)", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.2.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)"]
+dev = ["click (<8.3.0)", "cython-lint (>=0.12.2)", "mypy (==1.10.0)", "pycodestyle", "ruff (>=0.12.0)", "spin", "types-psutil", "typing_extensions"]
+doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.19.1)", "jupytext", "linkify-it-py", "matplotlib (>=3.5)", "myst-nb (>=1.2.0)", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.2.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)", "tabulate"]
 test = ["Cython", "array-api-strict (>=2.3.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest (>=8.0.0)", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]

 [[package]]
--- a/security_scanning/examples/models/contrib/hyperclovax/poetry.lock
+++ b/security_scanning/examples/models/contrib/hyperclovax/poetry.lock
@ -21,61 +21,61 @@ trio = ["trio (>=0.31.0)", "trio (>=0.32.0)"]

 [[package]]
 name = "av"
-version = "16.0.1"
+version = "16.1.0"
 description = "Pythonic bindings for FFmpeg's libraries."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "av-16.0.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:8b141aaa29a3afc96a1d467d106790782c1914628b57309eaadb8c10c299c9c0"},
-    {file = "av-16.0.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:4b8a08a59a5be0082af063d3f4b216e3950340121c6ea95b505a3f5f5cc8f21d"},
-    {file = "av-16.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:792e7fc3c08eae005ff36486983966476e553cbb55aaeb0ec99adc4909377320"},
-    {file = "av-16.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:4e8ef5df76d8d0ee56139789f80bb90ad1a82a7e6df6e080e2e95c06fa22aea7"},
-    {file = "av-16.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4f7a6985784a7464f078e419c71f5528c3e550ee5d605e7149b4a37a111eb136"},
-    {file = "av-16.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3f45c8d7b803b6faa2a25a26de5964a0a897de68298d9c9672c7af9d65d8b48a"},
-    {file = "av-16.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:58e6faf1d9328d8cc6be14c5aadacb7d2965ed6d6ae1af32696993096543ff00"},
-    {file = "av-16.0.1-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e310d1fb42879df9bad2152a8db6d2ff8bf332c8c36349a09d62cc122f5070fb"},
-    {file = "av-16.0.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:2f4b357e5615457a84e6b6290916b22864b76b43d5079e1a73bc27581a5b9bac"},
-    {file = "av-16.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:286665c77034c3a98080169b8b5586d5568a15da81fbcdaf8099252f2d232d7c"},
-    {file = "av-16.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f88de8e5b8ea29e41af4d8d61df108323d050ccfbc90f15b13ec1f99ce0e841e"},
-    {file = "av-16.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0cdb71ebe4d1b241cf700f8f0c44a7d2a6602b921e16547dd68c0842113736e1"},
-    {file = "av-16.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:28c27a65d40e8cf82b6db2543f8feeb8b56d36c1938f50773494cd3b073c7223"},
-    {file = "av-16.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:ffea39ac7574f234f5168f9b9602e8d4ecdd81853238ec4d661001f03a6d3f64"},
-    {file = "av-16.0.1-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:e41a8fef85dfb2c717349f9ff74f92f9560122a9f1a94b1c6c9a8a9c9462ba71"},
-    {file = "av-16.0.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:6352a64b25c9f985d4f279c2902db9a92424e6f2c972161e67119616f0796cb9"},
-    {file = "av-16.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:5201f7b4b5ed2128118cb90c2a6d64feedb0586ca7c783176896c78ffb4bbd5c"},
-    {file = "av-16.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:daecc2072b82b6a942acbdaa9a2e00c05234c61fef976b22713983c020b07992"},
-    {file = "av-16.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6573da96e8bebc3536860a7def108d7dbe1875c86517072431ced702447e6aea"},
-    {file = "av-16.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4bc064e48a8de6c087b97dd27cf4ef8c13073f0793108fbce3ecd721201b2502"},
-    {file = "av-16.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0c669b6b6668c8ae74451c15ec6d6d8a36e4c3803dc5d9910f607a174dd18f17"},
-    {file = "av-16.0.1-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:4c61c6c120f5c5d95c711caf54e2c4a9fb2f1e613ac0a9c273d895f6b2602e44"},
-    {file = "av-16.0.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ecc2e41320c69095f44aff93470a0d32c30892b2dbad0a08040441c81efa379"},
-    {file = "av-16.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:036f0554d6faef3f4a94acaeb0cedd388e3ab96eb0eb5a14ec27c17369c466c9"},
-    {file = "av-16.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:876415470a62e4a3550cc38db2fc0094c25e64eea34d7293b7454125d5958190"},
-    {file = "av-16.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:56902a06bd0828d13f13352874c370670882048267191ff5829534b611ba3956"},
-    {file = "av-16.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fe988c2bf0fc2d952858f791f18377ea4ae4e19ba3504793799cd6c2a2562edf"},
-    {file = "av-16.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:708a66c248848029bf518f0482b81c5803846f1b597ef8013b19c014470b620f"},
-    {file = "av-16.0.1-cp313-cp313t-macosx_11_0_x86_64.whl", hash = "sha256:79a77ee452537030c21a0b41139bedaf16629636bf764b634e93b99c9d5f4558"},
-    {file = "av-16.0.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:080823a6ff712f81e7089ae9756fb1512ca1742a138556a852ce50f58e457213"},
-    {file = "av-16.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:04e00124afa8b46a850ed48951ddda61de874407fb8307d6a875bba659d5727e"},
-    {file = "av-16.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:bc098c1c6dc4e7080629a7e9560e67bd4b5654951e17e5ddfd2b1515cfcd37db"},
-    {file = "av-16.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e6ffd3559a72c46a76aa622630751a821499ba5a780b0047ecc75105d43a6b61"},
-    {file = "av-16.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7a3f1a36b550adadd7513f4f5ee956f9e06b01a88e59f3150ef5fec6879d6f79"},
-    {file = "av-16.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:c6de794abe52b8c0be55d8bb09ade05905efa74b1a5ab4860b4b9c2bfb6578bf"},
-    {file = "av-16.0.1-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:4b55ba69a943ae592ad7900da67129422954789de9dc384685d6b529925f542e"},
-    {file = "av-16.0.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:d4a0c47b6c9bbadad8909b82847f5fe64a608ad392f0b01704e427349bcd9a47"},
-    {file = "av-16.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:8bba52f3035708456f6b1994d10b0371b45cfd8f917b5e84ff81aef4ec2f08bf"},
-    {file = "av-16.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:08e34c7e7b5e55e29931180bbe21095e1874ac120992bf6b8615d39574487617"},
-    {file = "av-16.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0d6250ab9db80c641b299987027c987f14935ea837ea4c02c5f5182f6b69d9e5"},
-    {file = "av-16.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7b621f28d8bcbb07cdcd7b18943ddc040739ad304545715ae733873b6e1b739d"},
-    {file = "av-16.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:92101f49082392580c9dba4ba2fe5b931b3bb0fb75a1a848bfb9a11ded68be91"},
-    {file = "av-16.0.1-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:07c464bf2bc362a154eccc82e235ef64fd3aaf8d76fc8ed63d0ae520943c6d3f"},
-    {file = "av-16.0.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:750da0673864b669c95882c7b25768cd93ece0e47010d74ebcc29dbb14d611f8"},
-    {file = "av-16.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:0b7c0d060863b2e341d07cd26851cb9057b7979814148b028fb7ee5d5eb8772d"},
-    {file = "av-16.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:e67c2eca6023ca7d76b0709c5f392b23a5defba499f4c262411f8155b1482cbd"},
-    {file = "av-16.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e3243d54d84986e8fbdc1946db634b0c41fe69b6de35a99fa8b763e18503d040"},
-    {file = "av-16.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bcf73efab5379601e6510abd7afe5f397d0f6defe69b1610c2f37a4a17996b"},
-    {file = "av-16.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:6368d4ff153d75469d2a3217bc403630dc870a72fe0a014d9135de550d731a86"},
-    {file = "av-16.0.1.tar.gz", hash = "sha256:dd2ce779fa0b5f5889a6d9e00fbbbc39f58e247e52d31044272648fe16ff1dbf"},
+    {file = "av-16.1.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:2395748b0c34fe3a150a1721e4f3d4487b939520991b13e7b36f8926b3b12295"},
+    {file = "av-16.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:72d7ac832710a158eeb7a93242370aa024a7646516291c562ee7f14a7ea881fd"},
+    {file = "av-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6cbac833092e66b6b0ac4d81ab077970b8ca874951e9c3974d41d922aaa653ed"},
+    {file = "av-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:eb990672d97c18f99c02f31c8d5750236f770ffe354b5a52c5f4d16c5e65f619"},
+    {file = "av-16.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:05ad70933ac3b8ef896a820ea64b33b6cca91a5fac5259cb9ba7fa010435be15"},
+    {file = "av-16.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d831a1062a3c47520bf99de6ec682bd1d64a40dfa958e5457bb613c5270e7ce3"},
+    {file = "av-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:358ab910fef3c5a806c55176f2b27e5663b33c4d0a692dafeb049c6ed71f8aff"},
+    {file = "av-16.1.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e88ad64ee9d2b9c4c5d891f16c22ae78e725188b8926eb88187538d9dd0b232f"},
+    {file = "av-16.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cb296073fa6935724de72593800ba86ae49ed48af03960a4aee34f8a611f442b"},
+    {file = "av-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:720edd4d25aa73723c1532bb0597806d7b9af5ee34fc02358782c358cfe2f879"},
+    {file = "av-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c7f2bc703d0df260a1fdf4de4253c7f5500ca9fc57772ea241b0cb241bcf972e"},
+    {file = "av-16.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d69c393809babada7d54964d56099e4b30a3e1f8b5736ca5e27bd7be0e0f3c83"},
+    {file = "av-16.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:441892be28582356d53f282873c5a951592daaf71642c7f20165e3ddcb0b4c63"},
+    {file = "av-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:273a3e32de64819e4a1cd96341824299fe06f70c46f2288b5dc4173944f0fd62"},
+    {file = "av-16.1.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:640f57b93f927fba8689f6966c956737ee95388a91bd0b8c8b5e0481f73513d6"},
+    {file = "av-16.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:ae3fb658eec00852ebd7412fdc141f17f3ddce8afee2d2e1cf366263ad2a3b35"},
+    {file = "av-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ee558d9c02a142eebcbe55578a6d817fedfde42ff5676275504e16d07a7f86"},
+    {file = "av-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7ae547f6d5fa31763f73900d43901e8c5fa6367bb9a9840978d57b5a7ae14ed2"},
+    {file = "av-16.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8cf065f9d438e1921dc31fc7aa045790b58aee71736897866420d80b5450f62a"},
+    {file = "av-16.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a345877a9d3cc0f08e2bc4ec163ee83176864b92587afb9d08dff50f37a9a829"},
+    {file = "av-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:f49243b1d27c91cd8c66fdba90a674e344eb8eb917264f36117bf2b6879118fd"},
+    {file = "av-16.1.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:ce2a1b3d8bf619f6c47a9f28cfa7518ff75ddd516c234a4ee351037b05e6a587"},
+    {file = "av-16.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:408dbe6a2573ca58a855eb8cd854112b33ea598651902c36709f5f84c991ed8e"},
+    {file = "av-16.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:57f657f86652a160a8a01887aaab82282f9e629abf94c780bbdbb01595d6f0f7"},
+    {file = "av-16.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:adbad2b355c2ee4552cac59762809d791bda90586d134a33c6f13727fb86cb3a"},
+    {file = "av-16.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f42e1a68ec2aebd21f7eb6895be69efa6aa27eec1670536876399725bbda4b99"},
+    {file = "av-16.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58fe47aeaef0f100c40ec8a5de9abbd37f118d3ca03829a1009cf288e9aef67c"},
+    {file = "av-16.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:565093ebc93b2f4b76782589564869dadfa83af5b852edebedd8fee746457d06"},
+    {file = "av-16.1.0-cp313-cp313t-macosx_11_0_x86_64.whl", hash = "sha256:574081a24edb98343fd9f473e21ae155bf61443d4ec9d7708987fa597d6b04b2"},
+    {file = "av-16.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:9ab00ea29c25ebf2ea1d1e928d7babb3532d562481c5d96c0829212b70756ad0"},
+    {file = "av-16.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a84a91188c1071f238a9523fd42dbe567fb2e2607b22b779851b2ce0eac1b560"},
+    {file = "av-16.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c2cd0de4dd022a7225ff224fde8e7971496d700be41c50adaaa26c07bb50bf97"},
+    {file = "av-16.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0816143530624a5a93bc5494f8c6eeaf77549b9366709c2ac8566c1e9bff6df5"},
+    {file = "av-16.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e3a28053af29644696d0c007e897d19b1197585834660a54773e12a40b16974c"},
+    {file = "av-16.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2e3e67144a202b95ed299d165232533989390a9ea3119d37eccec697dc6dbb0c"},
+    {file = "av-16.1.0-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:39a634d8e5a87e78ea80772774bfd20c0721f0d633837ff185f36c9d14ffede4"},
+    {file = "av-16.1.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:0ba32fb9e9300948a7fa9f8a3fc686e6f7f77599a665c71eb2118fdfd2c743f9"},
+    {file = "av-16.1.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:ca04d17815182d34ce3edc53cbda78a4f36e956c0fd73e3bab249872a831c4d7"},
+    {file = "av-16.1.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ee0e8de2e124a9ef53c955fe2add6ee7c56cc8fd83318265549e44057db77142"},
+    {file = "av-16.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:22bf77a2f658827043a1e184b479c3bf25c4c43ab32353677df2d119f080e28f"},
+    {file = "av-16.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2dd419d262e6a71cab206d80bbf28e0a10d0f227b671cdf5e854c028faa2d043"},
+    {file = "av-16.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:53585986fd431cd436f290fba662cfb44d9494fbc2949a183de00acc5b33fa88"},
+    {file = "av-16.1.0-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:76f5ed8495cf41e1209a5775d3699dc63fdc1740b94a095e2485f13586593205"},
+    {file = "av-16.1.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:8d55397190f12a1a3ae7538be58c356cceb2bf50df1b33523817587748ce89e5"},
+    {file = "av-16.1.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:9d51d9037437218261b4bbf9df78a95e216f83d7774fbfe8d289230b5b2e28e2"},
+    {file = "av-16.1.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0ce07a89c15644407f49d942111ca046e323bbab0a9078ff43ee57c9b4a50dad"},
+    {file = "av-16.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:cac0c074892ea97113b53556ff41c99562db7b9f09f098adac1f08318c2acad5"},
+    {file = "av-16.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7dec3dcbc35a187ce450f65a2e0dda820d5a9e6553eea8344a1459af11c98649"},
+    {file = "av-16.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6f90dc082ff2068ddbe77618400b44d698d25d9c4edac57459e250c16b33d700"},
+    {file = "av-16.1.0.tar.gz", hash = "sha256:a094b4fd87a3721dacf02794d3d2c82b8d712c85b9534437e82a8a978c175ffd"},
 ]

 [[package]]
@ -150,24 +150,24 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
 name = "fsspec"
-version = "2025.12.0"
+version = "2026.1.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
-    {file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
+    {file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
+    {file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
 ]

 [package.extras]
@ -178,7 +178,7 @@ dask = ["dask", "distributed"]
 dev = ["pre-commit", "ruff (>=0.5)"]
 doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
 dropbox = ["dropbox", "dropboxdrivefs", "requests"]
-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
+full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
 fuse = ["fusepy"]
 gcs = ["gcsfs"]
 git = ["pygit2"]
@ -195,7 +195,7 @@ smb = ["smbprotocol"]
 ssh = ["paramiko"]
 test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
 test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
-test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
+test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
 tqdm = ["tqdm"]

 [[package]]
@ -290,13 +290,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
@ -1250,4 +1250,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "c192429330d74a80d01bb76235d36a9d5ae31fff1b4ad57a0a22f7439a6843e0"
+content-hash = "852386737336a4091f1dffd31b545734085aec2c93c195e6f6b182047584c60d"
--- a/security_scanning/examples/models/contrib/hyperclovax/pyproject.toml
+++ b/security_scanning/examples/models/contrib/hyperclovax/pyproject.toml
@ -9,7 +9,7 @@ readme = "README.md"
 python = ">=3.10,<3.13"
 decord = "^0.6.0"
 timm = "^1.0.24"
-av = "^16.0.1"
+av = "^16.1.0"


 [build-system]
--- a/security_scanning/examples/models/contrib/internlm/poetry.lock
+++ b/security_scanning/examples/models/contrib/internlm/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/jais/poetry.lock
+++ b/security_scanning/examples/models/contrib/jais/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/mmdit/poetry.lock
+++ b/security_scanning/examples/models/contrib/mmdit/poetry.lock
@ -218,24 +218,24 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
 name = "fsspec"
-version = "2025.12.0"
+version = "2026.1.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
-    {file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
+    {file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
+    {file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
 ]

 [package.extras]
@ -246,7 +246,7 @@ dask = ["dask", "distributed"]
 dev = ["pre-commit", "ruff (>=0.5)"]
 doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
 dropbox = ["dropbox", "dropboxdrivefs", "requests"]
-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
+full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
 fuse = ["fusepy"]
 gcs = ["gcsfs"]
 git = ["pygit2"]
@ -263,7 +263,7 @@ smb = ["smbprotocol"]
 ssh = ["paramiko"]
 test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
 test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
-test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
+test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
 tqdm = ["tqdm"]

 [[package]]
--- a/security_scanning/examples/models/contrib/mpt/poetry.lock
+++ b/security_scanning/examples/models/contrib/mpt/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/opt/poetry.lock
+++ b/security_scanning/examples/models/contrib/opt/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/skywork/poetry.lock
+++ b/security_scanning/examples/models/contrib/skywork/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/smaug/poetry.lock
+++ b/security_scanning/examples/models/contrib/smaug/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/contrib/stdit/poetry.lock
+++ b/security_scanning/examples/models/contrib/stdit/poetry.lock
@ -166,15 +166,15 @@ test-tox-coverage = ["coverage (>=5.5)"]

 [[package]]
 name = "bitsandbytes"
-version = "0.49.0"
+version = "0.49.1"
 description = "k-bit optimizers and matrix multiplication routines."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "bitsandbytes-0.49.0-py3-none-macosx_14_0_arm64.whl", hash = "sha256:17d5b57e6d51b78bcfc07da0e93db061181b25bffabfafe101dd9b75c2710872"},
-    {file = "bitsandbytes-0.49.0-py3-none-manylinux_2_24_aarch64.whl", hash = "sha256:7e69951b4d207a676986fce967544d9599f23518d0f09d478295996aeff377c2"},
-    {file = "bitsandbytes-0.49.0-py3-none-manylinux_2_24_x86_64.whl", hash = "sha256:0c46cdef50b3174463b6bdf13715c9f1f00b360be3626e3c5d2f8d226af2cf3f"},
-    {file = "bitsandbytes-0.49.0-py3-none-win_amd64.whl", hash = "sha256:57a327c6d65f7eda32eb8d416ef8e44d2415c2e7b4fdb735896abd04171ae696"},
+    {file = "bitsandbytes-0.49.1-py3-none-macosx_14_0_arm64.whl", hash = "sha256:9de01d4384b6c71ef9ab052b98457dc0e4fff8fe06ab14833b5b712700deb005"},
+    {file = "bitsandbytes-0.49.1-py3-none-manylinux_2_24_aarch64.whl", hash = "sha256:acd4730a0db3762d286707f4a3bc1d013d21dd5f0e441900da57ec4198578d4e"},
+    {file = "bitsandbytes-0.49.1-py3-none-manylinux_2_24_x86_64.whl", hash = "sha256:e7940bf32457dc2e553685285b2a86e82f5ec10b2ae39776c408714f9ae6983c"},
+    {file = "bitsandbytes-0.49.1-py3-none-win_amd64.whl", hash = "sha256:6ead0763f4beb936f9a09acb49ec094a259180906fc0605d9ca0617249c3c798"},
 ]

 [package.dependencies]
@ -590,24 +590,24 @@ pytest = ["pytest (>=7)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
 name = "fsspec"
-version = "2025.12.0"
+version = "2026.1.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
-    {file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
+    {file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
+    {file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
 ]

 [package.extras]
@ -618,7 +618,7 @@ dask = ["dask", "distributed"]
 dev = ["pre-commit", "ruff (>=0.5)"]
 doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
 dropbox = ["dropbox", "dropboxdrivefs", "requests"]
-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
+full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
 fuse = ["fusepy"]
 gcs = ["gcsfs"]
 git = ["pygit2"]
@ -635,7 +635,7 @@ smb = ["smbprotocol"]
 ssh = ["paramiko"]
 test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
 test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
-test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
+test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
 tqdm = ["tqdm"]

 [[package]]
@ -1327,21 +1327,21 @@ ssh = ["paramiko"]

 [[package]]
 name = "protobuf"
-version = "6.33.2"
+version = "6.33.3"
 description = ""
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
-    {file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
-    {file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
-    {file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
-    {file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
-    {file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
-    {file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
+    {file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
+    {file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
+    {file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
+    {file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
+    {file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
+    {file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
+    {file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
 ]

 [[package]]
@ -2350,4 +2350,4 @@ dev = ["pytest", "setuptools"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "1dd6c5ec9bea98c49f5a6401d4af6e4406a1940a1c91823b730838f9bdf18ac9"
+content-hash = "6f8d1977f60d92124fb949b55a658c7e8e176458dc9e2a29ee1d7c52dec7ab26"
--- a/security_scanning/examples/models/contrib/stdit/pyproject.toml
+++ b/security_scanning/examples/models/contrib/stdit/pyproject.toml
@ -13,7 +13,7 @@ rotary-embedding-torch = "0.5.3"
 fabric = "^3.2.2"
 contexttimer = "^0.3.3"
 ray = "^2.53.0"
-protobuf = "^6.33.2"
+protobuf = "^6.33.3"
 bitsandbytes = ">=0.39.0"
 rpyc = "6.0.0"
 galore-torch = "^1.0"
--- a/security_scanning/examples/models/core/commandr/poetry.lock
+++ b/security_scanning/examples/models/core/commandr/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/core/gemma/poetry.lock
+++ b/security_scanning/examples/models/core/gemma/poetry.lock
@ -522,13 +522,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -872,13 +872,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
@ -1796,21 +1796,21 @@ files = [

 [[package]]
 name = "protobuf"
-version = "6.33.2"
+version = "6.33.3"
 description = ""
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
-    {file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
-    {file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
-    {file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
-    {file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
-    {file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
-    {file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
+    {file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
+    {file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
+    {file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
+    {file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
+    {file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
+    {file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
+    {file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
 ]

 [[package]]
--- a/security_scanning/examples/models/core/glm-4-9b/poetry.lock
+++ b/security_scanning/examples/models/core/glm-4-9b/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
@ -1343,21 +1343,21 @@ files = [

 [[package]]
 name = "protobuf"
-version = "6.33.2"
+version = "6.33.3"
 description = ""
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
-    {file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
-    {file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
-    {file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
-    {file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
-    {file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
-    {file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
+    {file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
+    {file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
+    {file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
+    {file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
+    {file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
+    {file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
+    {file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
 ]

 [[package]]
@ -2234,4 +2234,4 @@ propcache = ">=0.2.1"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "072741ab4fb1cf908a6e193f60f71e7205d9e9d3c74839bc9a0d153046c675cf"
+content-hash = "afb9d901c2b136e6d39517fb48361cf0e030ee48cdfb15f5486ac8d83e8f48ca"
--- a/security_scanning/examples/models/core/glm-4-9b/pyproject.toml
+++ b/security_scanning/examples/models/core/glm-4-9b/pyproject.toml
@ -9,7 +9,7 @@ readme = "README.md"
 python = ">=3.10,<3.13"
 datasets = "3.1.0"
 evaluate = "^0.4.6"
-protobuf = "^6.33.2"
+protobuf = "^6.33.3"
 rouge-score = "^0.1.2"
 sentencepiece = "^0.2.1"
 tiktoken = "^0.12.0"
--- a/security_scanning/examples/models/core/gpt/poetry.lock
+++ b/security_scanning/examples/models/core/gpt/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/core/llama/poetry.lock
+++ b/security_scanning/examples/models/core/llama/poetry.lock
@ -453,13 +453,13 @@ torch = ["torch"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
--- a/security_scanning/examples/models/core/mamba/poetry.lock
+++ b/security_scanning/examples/models/core/mamba/poetry.lock
@ -453,13 +453,13 @@ torch = ["torch"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
--- a/security_scanning/examples/models/core/mixtral/poetry.lock
+++ b/security_scanning/examples/models/core/mixtral/poetry.lock
@ -176,24 +176,24 @@ files = [

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
 name = "fsspec"
-version = "2025.12.0"
+version = "2026.1.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
-    {file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
+    {file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
+    {file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
 ]

 [package.extras]
@ -204,7 +204,7 @@ dask = ["dask", "distributed"]
 dev = ["pre-commit", "ruff (>=0.5)"]
 doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
 dropbox = ["dropbox", "dropboxdrivefs", "requests"]
-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
+full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
 fuse = ["fusepy"]
 gcs = ["gcsfs"]
 git = ["pygit2"]
@ -221,7 +221,7 @@ smb = ["smbprotocol"]
 ssh = ["paramiko"]
 test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
 test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
-test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
+test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
 tqdm = ["tqdm"]

 [[package]]
--- a/security_scanning/examples/models/core/mllama/poetry.lock
+++ b/security_scanning/examples/models/core/mllama/poetry.lock
@ -168,24 +168,24 @@ files = [

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
 name = "fsspec"
-version = "2025.12.0"
+version = "2026.1.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
-    {file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
+    {file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
+    {file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
 ]

 [package.extras]
@ -196,7 +196,7 @@ dask = ["dask", "distributed"]
 dev = ["pre-commit", "ruff (>=0.5)"]
 doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
 dropbox = ["dropbox", "dropboxdrivefs", "requests"]
-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
+full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
 fuse = ["fusepy"]
 gcs = ["gcsfs"]
 git = ["pygit2"]
@ -213,7 +213,7 @@ smb = ["smbprotocol"]
 ssh = ["paramiko"]
 test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
 test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
-test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
+test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
 tqdm = ["tqdm"]

 [[package]]
--- a/security_scanning/examples/models/core/nemotron/poetry.lock
+++ b/security_scanning/examples/models/core/nemotron/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/core/phi/poetry.lock
+++ b/security_scanning/examples/models/core/phi/poetry.lock
@ -500,13 +500,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -782,13 +782,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/core/qwen/poetry.lock
+++ b/security_scanning/examples/models/core/qwen/poetry.lock
@ -688,13 +688,13 @@ psutil = ["psutil (>=5.8.0)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -1917,13 +1917,13 @@ files = [

 [[package]]
 name = "openai"
-version = "2.14.0"
+version = "2.15.0"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "openai-2.14.0-py3-none-any.whl", hash = "sha256:7ea40aca4ffc4c4a776e77679021b47eec1160e341f42ae086ba949c9dcc9183"},
-    {file = "openai-2.14.0.tar.gz", hash = "sha256:419357bedde9402d23bf8f2ee372fca1985a73348debba94bddff06f19459952"},
+    {file = "openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3"},
+    {file = "openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba"},
 ]

 [package.dependencies]
@ -2927,30 +2927,30 @@ six = ">=1.14.0"

 [[package]]
 name = "ruff"
-version = "0.14.10"
+version = "0.14.11"
 description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.14.10-py3-none-linux_armv6l.whl", hash = "sha256:7a3ce585f2ade3e1f29ec1b92df13e3da262178df8c8bdf876f48fa0e8316c49"},
-    {file = "ruff-0.14.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:674f9be9372907f7257c51f1d4fc902cb7cf014b9980152b802794317941f08f"},
-    {file = "ruff-0.14.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d85713d522348837ef9df8efca33ccb8bd6fcfc86a2cde3ccb4bc9d28a18003d"},
-    {file = "ruff-0.14.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6987ebe0501ae4f4308d7d24e2d0fe3d7a98430f5adfd0f1fead050a740a3a77"},
-    {file = "ruff-0.14.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16a01dfb7b9e4eee556fbfd5392806b1b8550c9b4a9f6acd3dbe6812b193c70a"},
-    {file = "ruff-0.14.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7165d31a925b7a294465fa81be8c12a0e9b60fb02bf177e79067c867e71f8b1f"},
-    {file = "ruff-0.14.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c561695675b972effb0c0a45db233f2c816ff3da8dcfbe7dfc7eed625f218935"},
-    {file = "ruff-0.14.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bb98fcbbc61725968893682fd4df8966a34611239c9fd07a1f6a07e7103d08e"},
-    {file = "ruff-0.14.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f24b47993a9d8cb858429e97bdf8544c78029f09b520af615c1d261bf827001d"},
-    {file = "ruff-0.14.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59aabd2e2c4fd614d2862e7939c34a532c04f1084476d6833dddef4afab87e9f"},
-    {file = "ruff-0.14.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:213db2b2e44be8625002dbea33bb9c60c66ea2c07c084a00d55732689d697a7f"},
-    {file = "ruff-0.14.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b914c40ab64865a17a9a5b67911d14df72346a634527240039eb3bd650e5979d"},
-    {file = "ruff-0.14.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1484983559f026788e3a5c07c81ef7d1e97c1c78ed03041a18f75df104c45405"},
-    {file = "ruff-0.14.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c70427132db492d25f982fffc8d6c7535cc2fd2c83fc8888f05caaa248521e60"},
-    {file = "ruff-0.14.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5bcf45b681e9f1ee6445d317ce1fa9d6cba9a6049542d1c3d5b5958986be8830"},
-    {file = "ruff-0.14.10-py3-none-win32.whl", hash = "sha256:104c49fc7ab73f3f3a758039adea978869a918f31b73280db175b43a2d9b51d6"},
-    {file = "ruff-0.14.10-py3-none-win_amd64.whl", hash = "sha256:466297bd73638c6bdf06485683e812db1c00c7ac96d4ddd0294a338c62fdc154"},
-    {file = "ruff-0.14.10-py3-none-win_arm64.whl", hash = "sha256:e51d046cf6dda98a4633b8a8a771451107413b0f07183b2bef03f075599e44e6"},
-    {file = "ruff-0.14.10.tar.gz", hash = "sha256:9a2e830f075d1a42cd28420d7809ace390832a490ed0966fe373ba288e77aaf4"},
+    {file = "ruff-0.14.11-py3-none-linux_armv6l.whl", hash = "sha256:f6ff2d95cbd335841a7217bdfd9c1d2e44eac2c584197ab1385579d55ff8830e"},
+    {file = "ruff-0.14.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f6eb5c1c8033680f4172ea9c8d3706c156223010b8b97b05e82c59bdc774ee6"},
+    {file = "ruff-0.14.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f2fc34cc896f90080fca01259f96c566f74069a04b25b6205d55379d12a6855e"},
+    {file = "ruff-0.14.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53386375001773ae812b43205d6064dae49ff0968774e6befe16a994fc233caa"},
+    {file = "ruff-0.14.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a697737dce1ca97a0a55b5ff0434ee7205943d4874d638fe3ae66166ff46edbe"},
+    {file = "ruff-0.14.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6845ca1da8ab81ab1dce755a32ad13f1db72e7fba27c486d5d90d65e04d17b8f"},
+    {file = "ruff-0.14.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:e36ce2fd31b54065ec6f76cb08d60159e1b32bdf08507862e32f47e6dde8bcbf"},
+    {file = "ruff-0.14.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:590bcc0e2097ecf74e62a5c10a6b71f008ad82eb97b0a0079e85defe19fe74d9"},
+    {file = "ruff-0.14.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:53fe71125fc158210d57fe4da26e622c9c294022988d08d9347ec1cf782adafe"},
+    {file = "ruff-0.14.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a35c9da08562f1598ded8470fcfef2afb5cf881996e6c0a502ceb61f4bc9c8a3"},
+    {file = "ruff-0.14.11-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:0f3727189a52179393ecf92ec7057c2210203e6af2676f08d92140d3e1ee72c1"},
+    {file = "ruff-0.14.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:eb09f849bd37147a789b85995ff734a6c4a095bed5fd1608c4f56afc3634cde2"},
+    {file = "ruff-0.14.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:c61782543c1231bf71041461c1f28c64b961d457d0f238ac388e2ab173d7ecb7"},
+    {file = "ruff-0.14.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:82ff352ea68fb6766140381748e1f67f83c39860b6446966cff48a315c3e2491"},
+    {file = "ruff-0.14.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:728e56879df4ca5b62a9dde2dd0eb0edda2a55160c0ea28c4025f18c03f86984"},
+    {file = "ruff-0.14.11-py3-none-win32.whl", hash = "sha256:337c5dd11f16ee52ae217757d9b82a26400be7efac883e9e852646f1557ed841"},
+    {file = "ruff-0.14.11-py3-none-win_amd64.whl", hash = "sha256:f981cea63d08456b2c070e64b79cb62f951aa1305282974d4d5216e6e0178ae6"},
+    {file = "ruff-0.14.11-py3-none-win_arm64.whl", hash = "sha256:649fb6c9edd7f751db276ef42df1f3df41c38d67d199570ae2a7bd6cbc3590f0"},
+    {file = "ruff-0.14.11.tar.gz", hash = "sha256:f6dc463bfa5c07a59b1ff2c3b9767373e541346ea105503b4c0369c520a66958"},
 ]

 [[package]]
@ -3863,4 +3863,4 @@ propcache = ">=0.2.1"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "15be197852e2bbd2af31018599c8327bb9e7c547e9cb06b805f17c94d126948c"
+content-hash = "851a9b014df9acf65531d771edf4c10d381d0705c4b7bfc59102ea3c3d9ae963"
--- a/security_scanning/examples/models/core/qwen/pyproject.toml
+++ b/security_scanning/examples/models/core/qwen/pyproject.toml
@ -19,7 +19,7 @@ gradio = "4.44.1"
 mdtex2html = "^1.3.2"
 sse-starlette = "^3.1.2"
 aiohttp-sse-client = "^0.2.1"
-openai = "^2.14.0"
+openai = "^2.15.0"


 [build-system]
--- a/security_scanning/examples/models/core/qwen2audio/poetry.lock
+++ b/security_scanning/examples/models/core/qwen2audio/poetry.lock
@ -464,13 +464,13 @@ torch = ["torch"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
--- a/security_scanning/examples/models/core/qwenvl/poetry.lock
+++ b/security_scanning/examples/models/core/qwenvl/poetry.lock
@ -591,13 +591,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -945,13 +945,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/models/core/recurrentgemma/poetry.lock
+++ b/security_scanning/examples/models/core/recurrentgemma/poetry.lock
@ -510,13 +510,13 @@ torch = ["torch"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -1681,21 +1681,21 @@ files = [

 [[package]]
 name = "protobuf"
-version = "6.33.2"
+version = "6.33.3"
 description = ""
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
-    {file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
-    {file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
-    {file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
-    {file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
-    {file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
-    {file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
+    {file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
+    {file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
+    {file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
+    {file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
+    {file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
+    {file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
+    {file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
 ]

 [[package]]
--- a/security_scanning/examples/models/core/whisper/poetry.lock
+++ b/security_scanning/examples/models/core/whisper/poetry.lock
@ -516,13 +516,13 @@ profile = ["gprof2dot (>=2022.7.29)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
--- a/security_scanning/examples/ngram/poetry.lock
+++ b/security_scanning/examples/ngram/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/quantization/poetry.lock
+++ b/security_scanning/examples/quantization/poetry.lock
@ -454,13 +454,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -736,13 +736,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/ray_orchestrator/poetry.lock
+++ b/security_scanning/examples/ray_orchestrator/poetry.lock
@ -400,13 +400,13 @@ files = [

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -550,13 +550,13 @@ files = [

 [[package]]
 name = "google-api-core"
-version = "2.28.1"
+version = "2.29.0"
 description = "Google API client core library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "google_api_core-2.28.1-py3-none-any.whl", hash = "sha256:4021b0f8ceb77a6fb4de6fde4502cecab45062e66ff4f2895169e0b35bc9466c"},
-    {file = "google_api_core-2.28.1.tar.gz", hash = "sha256:2b405df02d68e68ce0fbc138559e6036559e685159d148ae5861013dc201baf8"},
+    {file = "google_api_core-2.29.0-py3-none-any.whl", hash = "sha256:d30bc60980daa36e314b5d5a3e5958b0200cb44ca8fa1be2b614e932b75a3ea9"},
+    {file = "google_api_core-2.29.0.tar.gz", hash = "sha256:84181be0f8e6b04006df75ddfe728f24489f0af57c96a529ff7cf45bc28797f7"},
 ]

 [package.dependencies]
@ -1286,21 +1286,21 @@ testing = ["google-api-core (>=1.31.5)"]

 [[package]]
 name = "protobuf"
-version = "6.33.2"
+version = "6.33.3"
 description = ""
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
-    {file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
-    {file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
-    {file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
-    {file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
-    {file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
-    {file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
+    {file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
+    {file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
+    {file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
+    {file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
+    {file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
+    {file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
+    {file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
 ]

 [[package]]
@ -1908,13 +1908,13 @@ zstd = ["backports-zstd (>=1.0.0)"]

 [[package]]
 name = "virtualenv"
-version = "20.36.0"
+version = "20.36.1"
 description = "Virtual Python Environment builder"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "virtualenv-20.36.0-py3-none-any.whl", hash = "sha256:e7ded577f3af534fd0886d4ca03277f5542053bedb98a70a989d3c22cfa5c9ac"},
-    {file = "virtualenv-20.36.0.tar.gz", hash = "sha256:a3601f540b515a7983508113f14e78993841adc3d83710fa70f0ac50f43b23ed"},
+    {file = "virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f"},
+    {file = "virtualenv-20.36.1.tar.gz", hash = "sha256:8befb5c81842c641f8ee658481e42641c68b5eab3521d8e092d18320902466ba"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/redrafter/poetry.lock
+++ b/security_scanning/examples/redrafter/poetry.lock
@ -489,13 +489,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "1.2.4"
+version = "1.3.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.9.0"
 files = [
-    {file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
-    {file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
+    {file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
+    {file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
 ]

 [package.dependencies]
--- a/security_scanning/examples/serve/poetry.lock
+++ b/security_scanning/examples/serve/poetry.lock
@ -524,13 +524,13 @@ dev = ["Sphinx (==2.1.0)", "future (==0.17.1)", "numpy (==1.16.4)", "pytest (==4

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -674,13 +674,13 @@ files = [

 [[package]]
 name = "fsspec"
-version = "2025.12.0"
+version = "2026.1.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
-    {file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
+    {file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
+    {file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
 ]

 [package.extras]
@ -691,7 +691,7 @@ dask = ["dask", "distributed"]
 dev = ["pre-commit", "ruff (>=0.5)"]
 doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
 dropbox = ["dropbox", "dropboxdrivefs", "requests"]
-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
+full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
 fuse = ["fusepy"]
 gcs = ["gcsfs"]
 git = ["pygit2"]
@ -708,7 +708,7 @@ smb = ["smbprotocol"]
 ssh = ["paramiko"]
 test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
 test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
-test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
+test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
 tqdm = ["tqdm"]

 [[package]]
--- a/security_scanning/examples/trtllm-eval/poetry.lock
+++ b/security_scanning/examples/trtllm-eval/poetry.lock
@ -552,13 +552,13 @@ test = ["pytest (>=6)"]

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -1972,13 +1972,13 @@ test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1

 [[package]]
 name = "peft"
-version = "0.18.0"
+version = "0.18.1"
 description = "Parameter-Efficient Fine-Tuning (PEFT)"
 optional = false
 python-versions = ">=3.10.0"
 files = [
-    {file = "peft-0.18.0-py3-none-any.whl", hash = "sha256:624f69ca6393b765ccc6734adda7ca57d80b238f0900a42c357d8b67a03d62ff"},
-    {file = "peft-0.18.0.tar.gz", hash = "sha256:c81c80b2056ab40c23d58ef25f74daab417ac653970718589a11a8af28218588"},
+    {file = "peft-0.18.1-py3-none-any.whl", hash = "sha256:0bf06847a3551e3019fc58c440cffc9a6b73e6e2962c95b52e224f77bbdb50f1"},
+    {file = "peft-0.18.1.tar.gz", hash = "sha256:2dd0d6bfce936d1850e48aaddbd250941c5c02fc8ef3237cd8fd5aac35e0bae2"},
 ]

 [package.dependencies]
--- a/security_scanning/metadata.json
+++ b/security_scanning/metadata.json
@ -1,4 +1,4 @@
 {
-  "commit_hash": "b85c447ceb1ff91c5d4df6b71de2256a5fabfe9d",
-  "timestamp": "2026-01-08T02:42:38Z"
+  "commit_hash": "c0e25e54181528c8e0818e2e9bc22fe5a889b8cc",
+  "timestamp": "2026-01-12T02:39:25Z"
 }
--- a/security_scanning/poetry.lock
+++ b/security_scanning/poetry.lock
@ -435,19 +435,19 @@ urllib3 = ">=1.25.3,<3"

 [[package]]
 name = "build"
-version = "1.3.0"
+version = "1.4.0"
 description = "A simple, correct Python build frontend"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "build-1.3.0-py3-none-any.whl", hash = "sha256:7145f0b5061ba90a1500d60bd1b13ca0a8a4cebdd0cc16ed8adf1c0e739f43b4"},
-    {file = "build-1.3.0.tar.gz", hash = "sha256:698edd0ea270bde950f53aed21f3a0135672206f3911e0176261a31e0e07b397"},
+    {file = "build-1.4.0-py3-none-any.whl", hash = "sha256:6a07c1b8eb6f2b311b96fcbdbce5dab5fe637ffda0fd83c9cac622e927501596"},
+    {file = "build-1.4.0.tar.gz", hash = "sha256:f1b91b925aa322be454f8330c6fb48b465da993d1e7e7e6fa35027ec49f3c936"},
 ]

 [package.dependencies]
 colorama = {version = "*", markers = "os_name == \"nt\""}
 importlib-metadata = {version = ">=4.6", markers = "python_full_version < \"3.10.2\""}
-packaging = ">=19.1"
+packaging = ">=24.0"
 pyproject_hooks = "*"
 tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}

@ -1144,13 +1144,13 @@ standard-no-fastapi-cloud-cli = ["email-validator (>=2.0.0)", "fastapi-cli[stand

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -3462,13 +3462,13 @@ onnx = ">=1.14.0"

 [[package]]
 name = "openai"
-version = "2.14.0"
+version = "2.15.0"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "openai-2.14.0-py3-none-any.whl", hash = "sha256:7ea40aca4ffc4c4a776e77679021b47eec1160e341f42ae086ba949c9dcc9183"},
-    {file = "openai-2.14.0.tar.gz", hash = "sha256:419357bedde9402d23bf8f2ee372fca1985a73348debba94bddff06f19459952"},
+    {file = "openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3"},
+    {file = "openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba"},
 ]

 [package.dependencies]
@ -3715,13 +3715,13 @@ test = ["importlib_metadata (>=2.0)", "pytest (>=6.0)"]

 [[package]]
 name = "peft"
-version = "0.18.0"
+version = "0.18.1"
 description = "Parameter-Efficient Fine-Tuning (PEFT)"
 optional = false
 python-versions = ">=3.10.0"
 files = [
-    {file = "peft-0.18.0-py3-none-any.whl", hash = "sha256:624f69ca6393b765ccc6734adda7ca57d80b238f0900a42c357d8b67a03d62ff"},
-    {file = "peft-0.18.0.tar.gz", hash = "sha256:c81c80b2056ab40c23d58ef25f74daab417ac653970718589a11a8af28218588"},
+    {file = "peft-0.18.1-py3-none-any.whl", hash = "sha256:0bf06847a3551e3019fc58c440cffc9a6b73e6e2962c95b52e224f77bbdb50f1"},
+    {file = "peft-0.18.1.tar.gz", hash = "sha256:2dd0d6bfce936d1850e48aaddbd250941c5c02fc8ef3237cd8fd5aac35e0bae2"},
 ]

 [package.dependencies]
@ -4045,21 +4045,21 @@ files = [

 [[package]]
 name = "protobuf"
-version = "6.33.2"
+version = "6.33.3"
 description = ""
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
-    {file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
-    {file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
-    {file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
-    {file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
-    {file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
-    {file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
-    {file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
+    {file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
+    {file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
+    {file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
+    {file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
+    {file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
+    {file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
+    {file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
+    {file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
 ]

 [[package]]
@ -5540,53 +5540,58 @@ testing = ["datasets", "numpy", "pytest", "pytest-asyncio", "requests", "ruff",

 [[package]]
 name = "tomli"
-version = "2.3.0"
+version = "2.4.0"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45"},
-    {file = "tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba"},
-    {file = "tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf"},
-    {file = "tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441"},
-    {file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845"},
-    {file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c"},
-    {file = "tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456"},
-    {file = "tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be"},
-    {file = "tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac"},
-    {file = "tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22"},
-    {file = "tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f"},
-    {file = "tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52"},
-    {file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8"},
-    {file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6"},
-    {file = "tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876"},
-    {file = "tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878"},
-    {file = "tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b"},
-    {file = "tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae"},
-    {file = "tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b"},
-    {file = "tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf"},
-    {file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f"},
-    {file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05"},
-    {file = "tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606"},
-    {file = "tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999"},
-    {file = "tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e"},
-    {file = "tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3"},
-    {file = "tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc"},
-    {file = "tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0"},
-    {file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879"},
-    {file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005"},
-    {file = "tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463"},
-    {file = "tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8"},
-    {file = "tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77"},
-    {file = "tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf"},
-    {file = "tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530"},
-    {file = "tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b"},
-    {file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67"},
-    {file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f"},
-    {file = "tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0"},
-    {file = "tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba"},
-    {file = "tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b"},
-    {file = "tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549"},
+    {file = "tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867"},
+    {file = "tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9"},
+    {file = "tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95"},
+    {file = "tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76"},
+    {file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d"},
+    {file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576"},
+    {file = "tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a"},
+    {file = "tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa"},
+    {file = "tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614"},
+    {file = "tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1"},
+    {file = "tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8"},
+    {file = "tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a"},
+    {file = "tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1"},
+    {file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b"},
+    {file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51"},
+    {file = "tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729"},
+    {file = "tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da"},
+    {file = "tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3"},
+    {file = "tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0"},
+    {file = "tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e"},
+    {file = "tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4"},
+    {file = "tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e"},
+    {file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c"},
+    {file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f"},
+    {file = "tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86"},
+    {file = "tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87"},
+    {file = "tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132"},
+    {file = "tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6"},
+    {file = "tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc"},
+    {file = "tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66"},
+    {file = "tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d"},
+    {file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702"},
+    {file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8"},
+    {file = "tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776"},
+    {file = "tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475"},
+    {file = "tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2"},
+    {file = "tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9"},
+    {file = "tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0"},
+    {file = "tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df"},
+    {file = "tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d"},
+    {file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f"},
+    {file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b"},
+    {file = "tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087"},
+    {file = "tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd"},
+    {file = "tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4"},
+    {file = "tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a"},
+    {file = "tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c"},
 ]

 [[package]]
@ -6339,4 +6344,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "dab9694d64d1c91b512eb62bbd31da9d0cdb8c93e99941a7022f2f46aea905e3"
+content-hash = "f17eedd404a2af6728d14710809ea47ad34bc6672c035073bad9e6c709131a08"
--- a/security_scanning/pyproject.toml
+++ b/security_scanning/pyproject.toml
@ -9,7 +9,7 @@ readme = "README.md"
 [tool.poetry.dependencies]
 python = ">=3.10,<3.13"
 accelerate = ">=1.7.0"
-build = "^1.3.0"
+build = "^1.4.0"
 colored = "^2.3.1"
 cuda-python = ">=13"
 diffusers = ">=0.27.0"
@ -18,7 +18,7 @@ mpi4py = "^4.1.1"
 numpy = "<2"
 onnx = ">=1.18.0,<1.20.0"
 onnx-graphsurgeon = ">=0.5.2"
-openai = "^2.14.0"
+openai = "^2.15.0"
 polygraphy = "^0.49.26"
 psutil = "^7.2.1"
 nvidia-ml-py = ">=13"
@ -53,7 +53,7 @@ starlette = ">=0.49.1"
 uvicorn = "^0.40.0"
 setuptools = "<80"
 ordered-set = "^4.1.0"
-peft = "^0.18.0"
+peft = "^0.18.1"
 patchelf = "^0.17.2.4"
 einops = "^0.8.1"
 flashinfer-python = ">=0.3.0,<0.4.0"
--- a/security_scanning/triton_backend/poetry.lock
+++ b/security_scanning/triton_backend/poetry.lock
@ -146,13 +146,13 @@ files = [

 [[package]]
 name = "filelock"
-version = "3.20.2"
+version = "3.20.3"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
-    {file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
+    {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+    {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
 ]

 [[package]]
@ -174,13 +174,13 @@ test = ["hypothesis (<6.136.0)", "levenshtein (<=0.27.1)", "pip", "pylint (<3.3.

 [[package]]
 name = "fsspec"
-version = "2025.12.0"
+version = "2026.1.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
-    {file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
+    {file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
+    {file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
 ]

 [package.extras]
@ -191,7 +191,7 @@ dask = ["dask", "distributed"]
 dev = ["pre-commit", "ruff (>=0.5)"]
 doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
 dropbox = ["dropbox", "dropboxdrivefs", "requests"]
-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
+full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
 fuse = ["fusepy"]
 gcs = ["gcsfs"]
 git = ["pygit2"]
@ -208,7 +208,7 @@ smb = ["smbprotocol"]
 ssh = ["paramiko"]
 test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
 test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
-test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
+test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
 tqdm = ["tqdm"]

 [[package]]
--- a/setup.py
+++ b/setup.py
@ -128,7 +128,6 @@ else:
    ]

 package_data += [
-    'bindings.pyi',
    'bindings/*.pyi',
    'tools/plugin_gen/templates/*',
    'bench/build/benchmark_config.yml',
--- a/tensorrt_llm/_torch/attention_backend/sparse/dsa.py
+++ b/tensorrt_llm/_torch/attention_backend/sparse/dsa.py
@ -17,7 +17,7 @@ from tensorrt_llm._torch.modules.multi_stream_utils import \
    maybe_execute_in_parallel
 from tensorrt_llm._torch.modules.rotary_embedding import RotaryEmbedding
 from tensorrt_llm._torch.pyexecutor.resource_manager import KVCacheManager
-from tensorrt_llm._torch.utils import maybe_compile
+from tensorrt_llm._torch.utils import maybe_compile, maybe_compiled_cat
 from tensorrt_llm._utils import get_size_in_bytes, get_sm_version
 from tensorrt_llm.bindings import DataType
 from tensorrt_llm.bindings.executor import KvCacheConfig
@ -1047,12 +1047,11 @@ class Indexer(nn.Module):
            # Indexer should just process the current MLA chunk as a single chunk
            has_mla_chunked_prefill = (
                metadata.enable_context_mla_with_cached_kv
-                and host_cached_tokens.sum().item() > 0
                and metadata.runtime_features.chunked_prefill)

            if has_mla_chunked_prefill:
-                # The MLA has already split the sequence, here just process what's given (as a single chunk)
-                # Cached token info is derived from metadata.host_ctx_cached_token_indptr in prepare_one_prefill_chunk
+                # MLA chunked prefill is active - use single-chunk pattern for
+                # indexer prefill chunks.
                chunk_specs = [(i, 0, host_seq_lens[i].item(),
                                host_seq_lens[:i].sum().item() if i > 0 else 0)
                               for i in range(num_contexts)]
@ -1063,7 +1062,8 @@ class Indexer(nn.Module):
                    )
                ]
            else:
-                # Normal mode: use indexer's own chunking logic to prevent L^2 complexity when long-sequence is used.
+                # Use indexer's own chunking logic to prevent L^2 complexity of indexer MQA logits computation for long sequences.
+                # This is only used when MLA chunked prefill is not enabled.
                chunk_groups = split_prefill_chunks(
                    host_seq_lens,
                    metadata.indexer_max_chunk_size,
@ -1541,7 +1541,7 @@ class Indexer(nn.Module):

    def _prep_q_or_k(self, qk_pe: torch.Tensor, qk_nope: torch.Tensor):
        """Concatenate, rotate, and FP8 quantize for Q or K"""
-        q_or_k = torch.cat([qk_pe, qk_nope], dim=-1)
+        q_or_k = maybe_compiled_cat([qk_pe, qk_nope], dim=-1)
        q_or_k = rotate_activation(q_or_k)
        q_or_k = q_or_k.view(-1, self.head_dim)
        q_or_k = fp8_utils.fp8_quantize_1x128_sf_transpose(
--- a/tensorrt_llm/_torch/models/init.py
+++ b/tensorrt_llm/_torch/models/init.py
@ -5,6 +5,7 @@ from .modeling_bert import BertForSequenceClassification
 from .modeling_clip import CLIPVisionModel
 from .modeling_deepseekv3 import DeepseekV3ForCausalLM
 from .modeling_exaone4 import Exaone4ForCausalLM
+from .modeling_exaone_moe import ExaoneMoeForCausalLM
 from .modeling_gemma3 import Gemma3ForCausalLM
 from .modeling_gemma3vl import Gemma3VLM
 from .modeling_glm import Glm4MoeForCausalLM
@ -44,6 +45,7 @@ __all__ = [
    "CLIPVisionModel",
    "DeepseekV3ForCausalLM",
    "Exaone4ForCausalLM",
+    "ExaoneMoeForCausalLM",
    "Gemma3ForCausalLM",
    "Gemma3VLM",
    "HCXVisionForCausalLM",
--- a/tensorrt_llm/_torch/models/modeling_exaone_moe.py
+++ b/tensorrt_llm/_torch/models/modeling_exaone_moe.py
@ -0,0 +1,581 @@
+import math
+import os
+import re
+from typing import Dict, List, Optional, Tuple
+
+import torch
+from torch import nn
+
+from tensorrt_llm._ipc_utils import can_access_peer
+from tensorrt_llm._torch.modules.qk_norm_attention import QKNormRoPEAttention
+from tensorrt_llm.functional import PositionEmbeddingType
+from tensorrt_llm.mapping import Mapping
+from tensorrt_llm.models.modeling_utils import QuantConfig
+from tensorrt_llm.quantization import QuantAlgo
+
+from ...logger import logger
+from ..attention_backend import AttentionMetadata
+from ..attention_backend.interface import (
+    PositionalEmbeddingParams,
+    PredefinedAttentionMask,
+    RopeParams,
+)
+from ..distributed import (
+    AllReduce,
+    AllReduceFusionOp,
+    AllReduceParams,
+    MoEAllReduce,
+    MoEAllReduceParams,
+)
+from ..model_config import ModelConfig
+from ..models.modeling_deepseekv3 import Deepseekv3MoE
+from ..modules.decoder_layer import DecoderLayer
+from ..modules.embedding import Embedding
+from ..modules.gated_mlp import GatedMLP
+from ..modules.linear import TensorParallelMode
+from ..modules.rms_norm import RMSNorm
+from ..utils import AuxStreamType, Fp4QuantizedTensor
+from .modeling_utils import (
+    DecoderModel,
+    DecoderModelForCausalLM,
+    EagerFusionConfig,
+    register_auto_model,
+)
+
+# fmt: off
+# TODO: Remove this once we have a proper transformers package
+from transformers import AutoConfig, PretrainedConfig  # isort: skip
+
+class ExaoneMoEConfig(PretrainedConfig):
+    model_type = "exaone_moe"
+
+logger.warning_once(
+    "transformers does not support 'ExaoneMoEConfig'. "
+    "Register ExaoneMoEConfig to mimic the ExaoneMoE model.",
+    key="EXAONE_MOE_REGISTER_WARNING"
+)
+AutoConfig.register(ExaoneMoEConfig.model_type, ExaoneMoEConfig)
+# End of the config register.
+# fmt: on
+
+
+def check_is_moe(config: ExaoneMoEConfig, layer_idx: int) -> bool:
+    """
+    Check if the current layer is a MoE layer.
+    """
+    return hasattr(config, "is_moe_layer") and config.is_moe_layer[layer_idx]
+
+
+def enable_attn_allreduce(mapping: Mapping):
+    return not mapping.enable_attention_dp or mapping.has_tp()
+
+
+class ExaoneMoeAttention(QKNormRoPEAttention):
+    def __init__(
+        self,
+        model_config: ModelConfig[ExaoneMoEConfig],
+        layer_idx: Optional[int] = None,
+        fuse_qk_norm_rope: bool = False,
+        disable_deep_gemm: bool = False,
+    ):
+        config = model_config.pretrained_config
+
+        self.attention_window_size = None
+        self.is_sliding = config.layer_types[layer_idx] == "sliding_attention"
+
+        # NOTE: In ExaoneMoe, only sliding layers apply rope.
+        pos_embd_params = None
+        if self.is_sliding:
+            self.attention_window_size = config.sliding_window
+            pos_embd_params = PositionalEmbeddingParams(
+                type=PositionEmbeddingType.rope_gpt_neox,
+                rope=RopeParams.from_config(config),
+            )
+
+        fuse_qk_norm_rope = self.is_sliding and fuse_qk_norm_rope
+
+        # NOTE: Fusing qk norm with rope has an issue that slightly hurts accuracy.
+        assert not fuse_qk_norm_rope, "Fusing qk norm and rope is having issue now"
+
+        super().__init__(
+            hidden_size=config.hidden_size,
+            num_attention_heads=config.num_attention_heads,
+            num_key_value_heads=config.num_key_value_heads,
+            max_position_embeddings=config.max_position_embeddings,
+            bias=False,
+            pos_embd_params=pos_embd_params,
+            fuse_qk_norm_rope=fuse_qk_norm_rope,
+            skip_rope=not self.is_sliding,
+            layer_idx=layer_idx,
+            dtype=config.torch_dtype,
+            config=model_config,
+            disable_deep_gemm=disable_deep_gemm,
+            reduce_output=enable_attn_allreduce(model_config.mapping),
+        )
+
+    def forward(
+        self,
+        position_ids: Optional[torch.LongTensor],
+        hidden_states: torch.Tensor,
+        attn_metadata: AttentionMetadata,
+        attention_mask: PredefinedAttentionMask = PredefinedAttentionMask.CAUSAL,
+        lora_params: Optional[dict] = None,
+        **kwargs,
+    ) -> torch.Tensor:
+        return super().forward(
+            position_ids=position_ids,
+            hidden_states=hidden_states,
+            attn_metadata=attn_metadata,
+            attention_mask=attention_mask,
+            lora_params=lora_params,
+            attention_window_size=self.attention_window_size,
+            **kwargs,
+        )
+
+
+class ExaoneMoeSparseMoEBlock(Deepseekv3MoE):
+    """
+    ExaoneMoe Sparse MoE Block Layer.
+
+    It follows DeepSeek-V3 implementation.
+    """
+
+
+class ExaoneMoeDecoderLayer(DecoderLayer):
+    def __init__(
+        self,
+        model_config: ModelConfig[ExaoneMoEConfig],
+        aux_stream_dict: Dict[AuxStreamType, torch.cuda.Stream],
+        layer_idx: int,
+    ):
+        super().__init__()
+        self.model_config = model_config
+        config = model_config.pretrained_config
+        self.layer_idx = layer_idx
+
+        self.mapping = model_config.mapping
+        mapping = self.mapping
+        self.enable_attention_dp = mapping.enable_attention_dp
+        self.mlp_tp_size = mapping.tp_size
+        self.is_p2p_supported = can_access_peer(mapping)
+
+        self.fusion_config = EagerFusionConfig()
+        # MoE fusions are disabled by default in K-EXAONE since
+        # it may cause a slight accuracy drop due to numerical gap.
+        self.enable_fusion = os.environ.get("TRTLLM_EXAONE_EAGER_FUSION_ENABLED", "0") == "1"
+        self.enable_fusion &= not self.enable_attention_dp
+
+        # FIXME: incompatible with mixed quantization mode
+        quant_config = self._get_decoder_layer_quant_config(model_config, layer_idx)
+        self.is_nvfp4 = quant_config.layer_quant_mode.has_nvfp4()
+        assert quant_config.quant_algo is not QuantAlgo.MIXED_PRECISION, (
+            "MIXED_PRECISION is ambiguous"
+        )
+
+        self.allreduce = None
+        self.moe_allreduce = None
+        if not self.enable_attention_dp and self.mapping.tp_size > 1:
+            self.allreduce = AllReduce(
+                mapping=model_config.mapping,
+                strategy=model_config.allreduce_strategy,
+                dtype=config.torch_dtype,
+            )
+            self.moe_allreduce = MoEAllReduce(self.mapping)
+
+        has_tp = mapping.has_tp()
+        has_pp = mapping.has_pp()
+
+        # Submodule definitions
+        self.input_layernorm = RMSNorm(
+            hidden_size=config.hidden_size, eps=config.rms_norm_eps, dtype=config.torch_dtype
+        )
+
+        self.self_attn = ExaoneMoeAttention(model_config, layer_idx=layer_idx)
+
+        # MoE or Dense layer
+        self.is_moe_layer = check_is_moe(config, layer_idx)
+        if self.is_moe_layer:
+            self.fusion_config.PRE_MOE_FUSION = self.enable_fusion and has_tp
+            self.fusion_config.POST_MOE_FUSION = self.fusion_config.PRE_MOE_FUSION and not has_pp
+            self.mlp = ExaoneMoeSparseMoEBlock(
+                num_experts=config.num_experts,
+                top_k=config.num_experts_per_tok,
+                hidden_size=config.hidden_size,
+                intermediate_size=config.moe_intermediate_size,
+                shared_expert_intermediate_size=config.moe_intermediate_size
+                * config.num_shared_experts,
+                dtype=config.torch_dtype,
+                model_config=model_config,
+                override_quant_config=quant_config,
+                aux_stream_dict=aux_stream_dict,
+                layer_idx=layer_idx,
+            )
+        else:
+            block_size = 1
+            if quant_config.quant_algo is None and quant_config.group_size is not None:
+                block_size = quant_config.group_size
+            self.mlp_tp_size = self._compute_mlp_tp_size(config.intermediate_size, block_size)
+            has_mlp_tp = self.mlp_tp_size > 1
+
+            self.fusion_config.PRE_MLP_FUSION = self.enable_fusion and has_mlp_tp and self.is_nvfp4
+            self.fusion_config.POST_MLP_FUSION = self.enable_fusion and has_mlp_tp
+
+            self.mlp = GatedMLP(
+                hidden_size=config.hidden_size,
+                intermediate_size=config.intermediate_size,
+                bias=False,
+                dtype=config.torch_dtype,
+                config=model_config,
+                # Keep sharding consistent with computed mlp_tp_size.
+                # In attention-DP, mlp_tp_size==1 -> disable TP sharding here.
+                overridden_tp_size=self.mlp_tp_size,
+                layer_idx=layer_idx,
+                reduce_output=has_mlp_tp,
+            )
+
+        self.disable_attn_allreduce = (
+            self.fusion_config.PRE_MOE_FUSION
+            or self.fusion_config.PRE_MLP_FUSION
+            or self.mapping.tp_size == 1
+            or self.enable_attention_dp
+        )
+
+        self.post_attention_layernorm = RMSNorm(
+            hidden_size=config.hidden_size, eps=config.rms_norm_eps, dtype=config.torch_dtype
+        )
+        self.next_layer_layernorm: RMSNorm = None
+
+    def _get_decoder_layer_quant_config(
+        self, model_config: ModelConfig[ExaoneMoEConfig], layer_idx: int
+    ):
+        """
+        The MTP layer in the nvfp4 checkpoint is unquantized. Because the TRTLLM
+        moe_backend only supports fp8/fp4 quantization, we need to override
+        the quant_config for the MTP layer.
+        """
+        quant_config = model_config.quant_config
+
+        layer_name = f"model.layers.{layer_idx}"
+        if quant_config.is_module_excluded_from_quantization(layer_name):
+            return QuantConfig(
+                quant_algo=None,
+                kv_cache_quant_algo=quant_config.kv_cache_quant_algo,
+            )
+        else:
+            return model_config.quant_config
+
+    def _compute_mlp_tp_size(self, intermediate_size: int, block_size: int) -> int:
+        """Adopted from DeepseekV3DecoderLayer._compute_mlp_tp_size."""
+        assert intermediate_size % block_size == 0, (
+            f"intermediate_size {intermediate_size} must be divisible by block_size {block_size}."
+        )
+        if self.enable_attention_dp:
+            # If using attention DP, the MLP also uses DP instead of TP.
+            mlp_tp_size = 1
+        else:
+            # The two math.gcd operations ensure that mlp_tp_size falls in the candidate TP sizes.
+            tp = math.gcd(
+                intermediate_size // block_size,
+                self.mapping.tp_size,
+            )
+
+            if tp > self.mapping.gpus_per_node:
+                mlp_tp_size = math.gcd(
+                    tp,
+                    self.mapping.gpus_per_node,
+                )  # Avoid costly inter-node TP
+            else:
+                mlp_tp_size = tp
+        return mlp_tp_size
+
+    def forward(
+        self,
+        position_ids: torch.LongTensor,
+        hidden_states: torch.Tensor,
+        attn_metadata: AttentionMetadata,
+        residual: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        # LN has neem already applied at the previous layer except the first layer.
+        if residual is None:
+            residual = hidden_states
+            hidden_states = self.input_layernorm(hidden_states)
+
+        hidden_states = self.self_attn(
+            position_ids=position_ids,
+            hidden_states=hidden_states,
+            attn_metadata=attn_metadata,
+            all_reduce_params=AllReduceParams(enable_allreduce=not (self.disable_attn_allreduce)),
+            **kwargs,
+        )
+
+        if self.is_moe_layer:
+            hidden_states, residual = self.forward_moe(
+                hidden_states=hidden_states,
+                attn_metadata=attn_metadata,
+                residual=residual,
+            )
+        else:
+            hidden_states, residual = self.forward_mlp(
+                hidden_states=hidden_states,
+                residual=residual,
+            )
+
+        return hidden_states, residual
+
+    def forward_moe(
+        self,
+        hidden_states: torch.Tensor,
+        attn_metadata: AttentionMetadata,
+        residual: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        def _run_moe(hidden_states, hidden_states_fp4, do_finalize):
+            return self.mlp(
+                hidden_states,
+                hidden_states_fp4,
+                all_rank_num_tokens=attn_metadata.all_rank_num_tokens,
+                final_all_reduce_params=AllReduceParams(
+                    enable_allreduce=not (
+                        self.fusion_config.POST_MOE_FUSION or self.mapping.tp_size == 1
+                    )
+                ),
+                do_finalize=do_finalize,
+            )
+
+        if self.fusion_config.PRE_MOE_FUSION:
+            # moe_backend can be either CUTLASS or TRTLLM here
+            hidden_states, residual = self.allreduce(
+                hidden_states,
+                all_reduce_params=AllReduceParams(
+                    fusion_op=AllReduceFusionOp.RESIDUAL_RMS_NORM,
+                    residual=residual,
+                    norm_weight=self.post_attention_layernorm.weight,
+                    eps=self.post_attention_layernorm.variance_epsilon,
+                    trigger_completion_at_end=False,
+                ),
+            )
+        else:
+            # No fusion
+            hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
+
+        # Note: this fusion pattern is only supported for single-node TRTLLM-nvfp4 backend now
+        do_finalize = self.mapping.is_multi_node() or (
+            not (
+                self.fusion_config.POST_MOE_FUSION
+                and hidden_states.shape[0] <= self.moe_allreduce.max_token
+                and self.model_config.moe_backend == "TRTLLM"
+                and self.mlp.experts.has_nvfp4
+                and self.is_p2p_supported
+            )
+        )
+
+        hidden_states = _run_moe(hidden_states, hidden_states_fp4=None, do_finalize=do_finalize)
+
+        if self.fusion_config.POST_MOE_FUSION:
+            if do_finalize:
+                hidden_states, residual = self.allreduce(
+                    hidden_states,
+                    all_reduce_params=AllReduceParams(
+                        fusion_op=AllReduceFusionOp.RESIDUAL_RMS_NORM,
+                        residual=residual,
+                        norm_weight=self.next_layer_layernorm.weight,
+                        eps=self.next_layer_layernorm.variance_epsilon,
+                        trigger_completion_at_end=False,
+                    ),
+                )
+            else:
+                assert len(hidden_states) == 4, "hidden_states must have 4 elements"
+
+                shared_output = hidden_states[0]
+                fc2_output = hidden_states[1]
+                expert_scale_factor = hidden_states[2]
+                expanded_idx_to_permuted_idx = hidden_states[3]
+
+                moe_all_reduce_params = MoEAllReduceParams(
+                    expanded_idx_to_permuted_idx=expanded_idx_to_permuted_idx,
+                    expert_scale_factor=expert_scale_factor,
+                    shared_expert_output=shared_output,
+                    residual=residual,
+                    norm_weight=self.next_layer_layernorm.weight,
+                    eps=self.next_layer_layernorm.variance_epsilon,
+                    is_cutlass_min_latency=False,
+                )
+                hidden_states, residual = self.moe_allreduce(
+                    fc2_output, all_reduce_params=moe_all_reduce_params
+                )
+        elif self.next_layer_layernorm is not None:
+            hidden_states, residual = self.next_layer_layernorm(hidden_states, residual)
+
+        return hidden_states, residual
+
+    def forward_mlp(
+        self,
+        hidden_states: torch.Tensor,
+        residual: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        if self.fusion_config.PRE_MLP_FUSION:
+            act_fp4, act_sf, residual = self.allreduce(
+                hidden_states,
+                all_reduce_params=AllReduceParams(
+                    fusion_op=AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_NVFP4,
+                    residual=residual,
+                    norm_weight=self.post_attention_layernorm.weight,
+                    scale=self.mlp.gate_up_proj.input_scale,
+                    eps=self.post_attention_layernorm.variance_epsilon,
+                ),
+            )
+            hidden_states = Fp4QuantizedTensor(act_fp4, act_sf)
+        else:
+            hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
+
+        hidden_states = self.mlp(
+            hidden_states,
+            final_all_reduce_params=AllReduceParams(
+                enable_allreduce=not (self.fusion_config.POST_MLP_FUSION or self.mlp_tp_size == 1)
+            ),
+        )
+
+        if self.fusion_config.POST_MLP_FUSION:
+            hidden_states, residual = self.allreduce(
+                hidden_states,
+                all_reduce_params=AllReduceParams(
+                    fusion_op=AllReduceFusionOp.RESIDUAL_RMS_NORM,
+                    residual=residual,
+                    norm_weight=self.next_layer_layernorm.weight,
+                    eps=self.next_layer_layernorm.variance_epsilon,
+                ),
+            )
+        elif self.next_layer_layernorm is not None:
+            hidden_states, residual = self.next_layer_layernorm(hidden_states, residual)
+
+        return hidden_states, residual
+
+
+class ExaoneMoeModel(DecoderModel):
+    def __init__(self, model_config: ModelConfig[ExaoneMoEConfig]):
+        super().__init__(model_config)
+        config = self.model_config.pretrained_config
+        self.num_hidden_layers = config.num_hidden_layers
+        self.embed_tokens = Embedding(
+            config.vocab_size,
+            config.hidden_size,
+            dtype=config.torch_dtype,
+            mapping=model_config.mapping,
+            tensor_parallel_mode=TensorParallelMode.COLUMN,
+            gather_output=True,
+        )
+
+        aux_stream_list = [torch.cuda.Stream() for _ in range(3)]
+        self.aux_stream_dict = {
+            AuxStreamType.Attention: aux_stream_list[0],
+            AuxStreamType.MoeShared: aux_stream_list[0],
+            AuxStreamType.MoeChunkingOverlap: aux_stream_list[1],
+            AuxStreamType.MoeBalancer: aux_stream_list[2],
+        }
+
+        self.layers = nn.ModuleList(
+            [
+                ExaoneMoeDecoderLayer(
+                    model_config=model_config,
+                    aux_stream_dict=self.aux_stream_dict,
+                    layer_idx=layer_idx,
+                )
+                for layer_idx in range(self.num_hidden_layers)
+            ]
+        )
+
+        self.norm = RMSNorm(
+            hidden_size=config.hidden_size, eps=config.rms_norm_eps, dtype=config.torch_dtype
+        )
+
+    def forward(
+        self,
+        attn_metadata: AttentionMetadata,
+        input_ids: Optional[torch.LongTensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        lora_params=None,
+        **kwargs,
+    ) -> torch.Tensor | Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        if (input_ids is None) ^ (inputs_embeds is not None):
+            raise ValueError(
+                "You cannot specify both input_ids and inputs_embeds at "
+                "the same time, and must specify either one."
+            )
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+
+        hidden_states = inputs_embeds.to(self.dtype)
+        residual = None
+
+        for decoder_layer in self.layers[: self.num_hidden_layers]:
+            hidden_states, residual = decoder_layer(
+                position_ids=position_ids,
+                hidden_states=hidden_states,
+                attn_metadata=attn_metadata,
+                residual=residual,
+                lora_params=lora_params,
+            )
+        # The last LN already has been applied as a part of fusion.
+        return hidden_states
+
+
+@register_auto_model("ExaoneMoEForCausalLM")
+class ExaoneMoeForCausalLM(DecoderModelForCausalLM[ExaoneMoeModel, ExaoneMoEConfig]):
+    def __init__(
+        self,
+        model_config: ModelConfig[ExaoneMoEConfig],
+    ):
+        super().__init__(
+            ExaoneMoeModel(model_config),
+            config=model_config,
+            hidden_size=model_config.pretrained_config.hidden_size,
+            vocab_size=model_config.pretrained_config.vocab_size,
+        )
+
+    def load_weights(
+        self,
+        weights: Dict,
+        weight_mapper: Optional["BaseWeightMapper"] = None,  # noqa: F821
+        skip_modules: Optional[List[str]] = None,
+        allow_partial_loading: bool = False,
+    ):
+        # MoE naming pattern.
+        moe_weight_patterns = {
+            "gate_proj": "w1",
+            "up_proj": "w3",
+            "down_proj": "w2",
+        }
+
+        module_names = list(weights)
+        for name in module_names:
+            if "mlp.e_score_correction_bias" in name:
+                # Move bias into the gate module.
+                new_name = name.replace(
+                    "mlp.e_score_correction_bias", "mlp.gate.e_score_correction_bias"
+                )
+            else:
+                # MoE Weight Remapping.
+                new_name = name
+                for k, v in moe_weight_patterns.items():
+                    pattern = rf"(experts\.\d+\.){k}\b"
+                    new_name = re.sub(pattern, rf"\1{v}", new_name)
+
+            # Remap the name-parameter pair if needed.
+            if new_name != name:
+                weights[new_name] = weights.pop(name)
+
+        super().load_weights(
+            weights=weights,
+            weight_mapper=weight_mapper,
+            skip_modules=skip_modules or [],
+            allow_partial_loading=allow_partial_loading,
+        )
+
+    def post_load_weights(self):
+        # For the cross-layer residual+LN fusion.
+        for idx, layer in enumerate(self.model.layers[: self.config.num_hidden_layers]):
+            if idx == self.config.num_hidden_layers - 1:
+                layer.next_layer_layernorm = self.model.norm
+            else:
+                layer.next_layer_layernorm = self.model.layers[idx + 1].input_layernorm
--- a/Show More
+++ b/Show More