Merge branch 'main' into update_mnnvl_test

This commit is contained in:
Kaiyu Xie 2026-01-12 17:16:56 +08:00 committed by GitHub
commit bfd34f19b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
156 changed files with 4659 additions and 2308 deletions

View File

@ -129,6 +129,18 @@ static_assert(SPEC_DEC, "SPEC_Q_SEQ_LEN should only be used when SPEC_DEC is ena
#define SLIDING_WINDOW 0
#endif
#ifndef SKIP_SOFTMAX_ATTN
#define SKIP_SOFTMAX_ATTN 0
#endif
#ifndef SKIP_SOFTMAX_ATTN_BLOCK_STATS
#define SKIP_SOFTMAX_ATTN_BLOCK_STATS 0
#endif
#ifndef SKIP_SOFTMAX_ATTN_FIX_THRESHOLD_GREATER_THAN_ONE
#define SKIP_SOFTMAX_ATTN_FIX_THRESHOLD_GREATER_THAN_ONE 1
#endif
// 0 - no PDL
// 1 - naive PDL
// 2 - aggressive PDL (implemented only in mha_sm90.cu for now)

View File

@ -106,6 +106,7 @@ __device__ inline MatDesc makeMatDesc(void const* data, uint32_t dimKByteOffset,
asm volatile("trap;\n");
return 0;
}();
assert(__cvta_generic_to_shared(data) % baseAlign == 0);
uint32_t const baseOffset = ((patternAddr % baseAlign == 0) ? 0U : ((patternAddr >> 0x7) & 0x7));
return MatDesc{
/*addr=*/MatDesc::encode(__cvta_generic_to_shared(data)),

View File

@ -2734,6 +2734,25 @@ static constexpr auto kernel_mha = kernel_mha_impl;
#endif
#ifndef GENERATE_CUBIN
uint32_t computeNbSubSeqPerSeqMHA(cudaDeviceProp const& prop, uint32_t batchSize, uint32_t nbKHeads, uint32_t maxSeqLen)
{
if (!allowMultiBlockMode)
{
return 1;
}
auto const env = std::getenv("XQA_NB_SUB_SEQ");
if (env != nullptr)
{
int32_t const val = std::stoi(env);
if (val > 0)
{
return val;
}
}
return std::min<uint32_t>(
std::max<uint32_t>(1U, prop.multiProcessorCount / (batchSize * nbKHeads)), divUp(maxSeqLen, ctaTile.x));
}
void launchMHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
#if SLIDING_WINDOW
uint32_t slidingWinSize,
@ -2771,6 +2790,13 @@ void launchMHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
// int8/fp8 KV cache.
#if SPEC_DEC
SpecDecParams const& specDecParams,
#endif
#if SKIP_SOFTMAX_ATTN
float const skipSoftmaxThresholdScaleFactor, // for compatibility with mha_sm90.cu only
#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
uint32_t* __restrict__ skippedBlockCount, // for compatibility with mha_sm90.cu only
uint32_t* __restrict__ totalBlockCount, // for compatibility with mha_sm90.cu only
#endif
#endif
uint32_t* semaphores, void* scratch, cudaStream_t stream)
{
@ -2793,24 +2819,7 @@ void launchMHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
uint32_t const nbQHeads = nbKHeads * headGrpSize;
// const uint32_t nbSubSeqPerSeq = allowMultiBlockMode ? DBG_NB_CTAS_PER_SEQ : 1;
uint32_t const nbSubSeqPerSeq = [&]() -> uint32_t
{
if (!allowMultiBlockMode)
{
return 1;
}
auto const env = std::getenv("XQA_NB_SUB_SEQ");
if (env != nullptr)
{
int32_t const val = std::stoi(env);
if (val > 0)
{
return val;
}
}
return std::min<uint32_t>(
std::max<uint32_t>(1U, prop.multiProcessorCount / (batchSize * nbKHeads)), divUp(maxSeqLen, ctaTile.x));
}();
uint32_t const nbSubSeqPerSeq = computeNbSubSeqPerSeqMHA(prop, batchSize, nbKHeads, maxSeqLen);
// gridDim.z == batchSize && gridDim.y == nbKHeads && gridDim.x == nbSubSeqPerSeq
#if SPEC_DEC
const uint32_t nbTokenBlocksPerGrp = divUp(qSeqLen * headGrpSize, rowsPerBlock);

View File

@ -90,6 +90,9 @@ struct BeamSearchParams
// match trt-llm API.
};
uint32_t computeNbSubSeqPerSeqMHA(
cudaDeviceProp const& prop, uint32_t batchSize, uint32_t nbKHeads, uint32_t maxSeqLen);
void launchMHA(cudaDeviceProp const& prop, uint32_t const nbKHeads,
#if SLIDING_WINDOW
uint32_t slidingWinSize,
@ -127,9 +130,18 @@ void launchMHA(cudaDeviceProp const& prop, uint32_t const nbKHeads,
// int8/fp8 KV cache.
#if SPEC_DEC
SpecDecParams const& specDecParams,
#endif
#if SKIP_SOFTMAX_ATTN
float const skipSoftmaxThresholdScaleFactor,
#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
uint32_t* __restrict__ skippedBlockCount, uint32_t* __restrict__ totalBlockCount,
#endif
#endif
uint32_t* semaphores, void* scratch, cudaStream_t stream);
uint32_t computeNbSubSeqPerSeqHopperF8MHA(
cudaDeviceProp const& prop, uint32_t batchSize, uint32_t nbKHeads, uint32_t maxSeqLen);
void launchHopperF8MHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
#if SLIDING_WINDOW
uint32_t slidingWinSize,
@ -167,6 +179,12 @@ void launchHopperF8MHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
// int8/fp8 KV cache.
#if SPEC_DEC
SpecDecParams const& specDecParams,
#endif
#if SKIP_SOFTMAX_ATTN
float const skipSoftmaxThresholdScaleFactor,
#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
uint32_t* __restrict__ skippedBlockCount, uint32_t* __restrict__ totalBlockCount,
#endif
#endif
uint32_t* semaphores, void* scratch, cudaStream_t stream);

View File

@ -49,6 +49,10 @@ static_assert(specDecQLen * headGrpSize <= 32, "SPEC_Q_SEQ_LEN macro value is to
#define SWAP_AB (!SPEC_DEC)
#endif
#if SKIP_SOFTMAX_ATTN
static_assert(SWAP_AB && USE_PAGED_KV_CACHE && !SPEC_DEC && BEAM_WIDTH == 1, "SKIP_SOFTMAX_ATTN is not supported.");
#endif
#define IS_SUPPORTED_F16_CASE (CACHE_ELEM_ENUM == 0 && !SPEC_DEC && SWAP_AB && !USE_INPUT_KV && !LOW_PREC_OUTPUT)
inline constexpr bool swapAB = SWAP_AB;
@ -138,26 +142,38 @@ using PaddedOutHead = PaddedInputHead;
struct alignas(128) SharedMem
{
using QBuffer = Vec<Array2D<LdGrain, ctaNbQHeads, grainsPerQPart>, nbQParts>;
using KBuffer = Array2D<LdGrain, gemm0CtaTileNbTokens, exactDiv(cacheHeadPartBytes, grainBytes)>;
static constexpr uint32_t nbKBuf = 2;
KBuffer k[nbKBuf]; // as is loaded from global mem.
using XBuffer = Vec<Array2D<LdGrain, ctaNbQHeads, grainsPerXPart>, nbXParts>;
static constexpr uint32_t nbXBuf
= 2 * (gemm0CtaTileNbTokens >= gemm1CtaTileNbTokens ? 1 : exactDiv(gemm1CtaTileNbTokens, gemm0CtaTileNbTokens));
using VBuffer = Vec<Array2D<LdGrain, gemm1CtaTileNbTokens, exactDiv(cacheHeadPartBytes, grainBytes),
sizeof(XBuffer) % (cacheHeadPartBytes * 8) == 0>,
cacheHeadNbParts>;
#if !SWAP_AB
using VTBuffer = Array2D<LdGrain, headElems, exactDiv(gemm1CtaTileNbTokens, cacheElemsPerGrain), true>;
#endif
static constexpr uint32_t nbVBuf = 2;
#if CACHE_ELEM_ENUM == 0
using OutSwizzleBuf = Array2D<LdGrain, ctaNbQHeads, grainsPerPaddedInputHead>;
#elif CACHE_ELEM_ENUM == 2
using OutSwizzleBuf = Array2D<Vec<Vec<InputElem, 4>, 4>, ctaNbQHeads, exactDiv(headElems, 4 * 4)>;
#endif
#if SKIP_SOFTMAX_ATTN
static constexpr uint32_t nbKBuf = 2;
static constexpr uint32_t nbVBuf = 3; // @fixme: skip_softmax_attn: for skip softmax attn, an extra VBuffer is used
static constexpr uint32_t nbXBuf
= 3 * (gemm0CtaTileNbTokens >= gemm1CtaTileNbTokens ? 1 : exactDiv(gemm1CtaTileNbTokens, gemm0CtaTileNbTokens));
#else
static constexpr uint32_t nbKBuf = 2;
static constexpr uint32_t nbVBuf = 2;
static constexpr uint32_t nbXBuf
= 2 * (gemm0CtaTileNbTokens >= gemm1CtaTileNbTokens ? 1 : exactDiv(gemm1CtaTileNbTokens, gemm0CtaTileNbTokens));
#endif
static_assert(nbXBuf == nbVBuf);
// note: buffers used for GMMA may have additional alignment requirements
KBuffer k[nbKBuf]; // as is loaded from global mem.
QBuffer q; // For gmma math. Conversion done if needed.
union ReusedXVOutSwizzleBuf
{
struct XV
@ -196,9 +212,6 @@ struct alignas(128) SharedMem
return reusedXVOutSwizzleBuf[i].outSwizzle;
}
using QBuffer = Vec<Array2D<LdGrain, ctaNbQHeads, grainsPerQPart>, nbQParts>;
QBuffer q; // For gmma math. Conversion done if needed.
// @fixme: move these into reusedXVOutSwizzleBuf
#if SWAP_AB
ShmQWiseVec xColMax[nbXBuf];
@ -220,6 +233,11 @@ struct alignas(128) SharedMem
Vec<KVCachePageIndex, nbPagesPerTile> pages[2]; // one for K and one for V
#endif
#if SKIP_SOFTMAX_ATTN
uint32_t skipSoftmaxVotesGemm0ToV[nbXBuf]; // guarded by skipSoftmaxXBar
uint32_t skipSoftmaxVotesGemm0ToGemm1[nbXBuf]; // guarded by xBar
#endif
// mem barriers
CtaBarrierPair qBar;
@ -229,6 +247,9 @@ struct alignas(128) SharedMem
CtaBarrierPair vtBar[nbVBuf];
#endif
CtaBarrierPair xBar[nbXBuf];
#if SKIP_SOFTMAX_ATTN
CtaBarrierPair skipSoftmaxXBar[nbXBuf]; // for V to wait for X to be ready
#endif
// used internally in the gemm0 warp group
// @fixme: use separate arrive and wait for all usage
@ -425,8 +446,13 @@ __device__ void warpGrpApplyMask(Gemm0Acc& acc, SpecDec const& specDec,
#endif
#if SWAP_AB
#if SKIP_SOFTMAX_ATTN
__device__ RegColWiseVec computeWarpGrpColMax_sync(CtaBarrier& warpGrpBar, ShmQWiseVec& smemColMax, Gemm0Acc const& src,
float skipSoftmaxThreshold, uint32_t* smemSkipVote, bool maybeSkip);
#else
__device__ RegColWiseVec computeWarpGrpColMax_sync(
CtaBarrier& warpGrpBar, ShmQWiseVec& smemColMax, Gemm0Acc const& src);
#endif
__device__ void warpGrpApplyMask(uint32_t warpRank, Gemm0Acc& acc, uint32_t validRowBeg, uint32_t validRowEnd);
__device__ void warpGrpOnlineSoftmax(Gemm0Acc& acc, RegColWiseVec const& colMax);
__device__ RegColWiseVec computeWarpColSum(Gemm0Acc& src);
@ -675,6 +701,12 @@ CUBIN_EXPORT __global__
#endif
#if SPEC_DEC
SpecDecParams const specDecParams,
#endif
#if SKIP_SOFTMAX_ATTN
float const skipSoftmaxThresholdScaleFactor,
#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
uint32_t* __restrict__ skippedBlockCount, uint32_t* __restrict__ totalBlockCount,
#endif
#endif
uint32_t* __restrict__ const semaphores
= nullptr, // [nbReq][nbKHeads][divUp(specDecParams.qSeqLen, inputTokensPerCta)]
@ -753,6 +785,10 @@ CUBIN_EXPORT __global__
uint32_t const nbSubSeq = isMultiBlockMode ? mha::min(nbTilesInUse / multiBlockMinNbTilesPerCta, maxNbSubSeq) : 1;
static_assert(multiBlockMinNbTiles >= multiBlockMinNbTilesPerCta * 2);
assert(isMultiBlockMode == (nbSubSeq > 1));
#if SKIP_SOFTMAX_ATTN
bool const disableSkipForShortSeq = (cacheSeqLen < skipSoftmaxThresholdScaleFactor);
float const skipSoftmaxThreshold = disableSkipForShortSeq ? 0.0f : skipSoftmaxThresholdScaleFactor / cacheSeqLen;
#endif
if (idxSubSeq >= nbSubSeq)
{
return;
@ -776,21 +812,34 @@ CUBIN_EXPORT __global__
assert(dynamicSmemSize() >= sizeof(SharedMem));
SharedMem& smem = *reinterpret_cast<SharedMem*>(&smemByteBuf[0]);
constexpr uint32_t nbBuffers = 2;
static_assert(nbBuffers == SharedMem::nbKBuf && nbBuffers == SharedMem::nbVBuf && nbBuffers == SharedMem::nbXBuf);
if (wid < nbBuffers)
constexpr uint32_t maxNbBuffers = (SharedMem::nbXBuf > SharedMem::nbVBuf) ? SharedMem::nbXBuf : SharedMem::nbVBuf;
static_assert(
maxNbBuffers >= SharedMem::nbKBuf && maxNbBuffers >= SharedMem::nbVBuf && maxNbBuffers >= SharedMem::nbXBuf);
if (wid < maxNbBuffers)
{
if (warpElectSync())
{
smem.kBar[wid].initialize(gemm0NbThrds, gemm0NbThrds + warp_size);
smem.vBar[wid].initialize(gemm1NbThrds, gemm1NbThrds + warp_size);
#if !SWAP_AB
smem.vtBar[wid].initialize(gemm1NbThrds * 2, gemm1NbThrds * 2);
if (wid < SharedMem::nbKBuf)
{
smem.kBar[wid].initialize(gemm0NbThrds, gemm0NbThrds + warp_size);
}
if (wid < SharedMem::nbXBuf)
{
#if SKIP_SOFTMAX_ATTN
smem.skipSoftmaxXBar[wid].initialize(gemm0NbThrds + warp_size, gemm0NbThrds + warp_size);
smem.vBar[wid].initialize(gemm1NbThrds + warp_size, gemm1NbThrds + warp_size);
#else
smem.vBar[wid].initialize(gemm1NbThrds, gemm1NbThrds + warp_size);
#endif
smem.xBar[wid].initialize(gemm0NbThrds + gemm1NbThrds, gemm0NbThrds + gemm1NbThrds);
#if !SWAP_AB
smem.vtBar[wid].initialize(gemm1NbThrds * 2, gemm1NbThrds * 2);
#endif
smem.xBar[wid].initialize(gemm0NbThrds + gemm1NbThrds, gemm0NbThrds + gemm1NbThrds);
}
}
}
else if (wid == nbBuffers)
else if (wid == maxNbBuffers)
{
if (warpElectSync())
{
@ -819,6 +868,10 @@ CUBIN_EXPORT __global__
SpecDec const specDec{specDecParams, idxReq, idxInputSubSeq, cacheSeqLen};
#endif
#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
uint32_t localSkippedBlockCount = 0;
#endif
// QK gemm
constexpr uint32_t nbGmmaInstM = exactDiv(gemm0CtaTileNbTokens, gmma::instM);
using Acc = GmmaAcc<gemm0CtaTileNbTokens, ctaNbQHeads>;
@ -940,10 +993,39 @@ CUBIN_EXPORT __global__
}
}
#endif
uint32_t const idxXBuf = idxIter % SharedMem::nbXBuf;
auto& xBar = smem.xBar[idxXBuf];
// update colMax in shared mem and get a register copy
#if SWAP_AB
#if SKIP_SOFTMAX_ATTN
auto& skipSoftmaxXBar = smem.skipSoftmaxXBar[idxXBuf];
skipSoftmaxXBar.consumed.arrive_and_wait();
bool const maybeSkip = !disableSkipForShortSeq && idxIter != 0;
RegColWiseVec const colMax = computeWarpGrpColMax_sync(smem.gemm0WarpGrpBar, smem.gemm0CurrentSeqMax, acc,
skipSoftmaxThreshold, &smem.skipSoftmaxVotesGemm0ToV[idxXBuf], maybeSkip);
bool const shouldSkipSoftmaxAttn = static_cast<bool>(smem.skipSoftmaxVotesGemm0ToV[idxXBuf]);
unused(skipSoftmaxXBar.produced.arrive());
warpGrpOnlineSoftmax(acc, colMax);
if (shouldSkipSoftmaxAttn)
{
xBar.consumed.arrive_and_wait();
if (threadIdx.x == 0)
{
smem.skipSoftmaxVotesGemm0ToGemm1[idxXBuf] = 1U;
#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
localSkippedBlockCount++;
#endif
}
asm volatile("fence.proxy.async.shared::cta;\n"); // maybe not used
unused(xBar.produced.arrive());
continue;
}
#else
RegColWiseVec const colMax = computeWarpGrpColMax_sync(smem.gemm0WarpGrpBar, smem.gemm0CurrentSeqMax, acc);
warpGrpOnlineSoftmax(acc, colMax);
#endif
#else
RegRowWiseVec const rowMax = computeWarpGrpRowMax_sync(warpRank, smem.gemm0CurrentSeqMax, acc);
warpGrpOnlineSoftmax(acc, rowMax);
@ -959,8 +1041,6 @@ CUBIN_EXPORT __global__
// map 1 to fp8_max before conversion to fp8
acc = acc * kE4M3_MAX;
uint32_t const idxXBuf = idxIter % SharedMem::nbXBuf;
auto& xBar = smem.xBar[idxXBuf];
// @fixme: for fp16/bf16, try not to transpose acc here, and leave it to the next GEMM.
#if SWAP_AB
storeGemm0AccToShm(warpRank, laneId(), smem.xBuf(idxXBuf), xBar.consumed, acc);
@ -989,13 +1069,25 @@ CUBIN_EXPORT __global__
storeShmRowWiseVec(warpRank, smem.xRowMax[idxXBuf], rowMax);
storeShmRowWiseVec(warpRank, smem.xRowSum[idxXBuf], rowSum);
#endif
#if SKIP_SOFTMAX_ATTN
if (threadIdx.x == 0)
{
smem.skipSoftmaxVotesGemm0ToGemm1[idxXBuf] = 0;
}
#endif
__syncwarp();
// the release semantics of arrive does not work for async consumers like gmma. additional fence is
// needed.
asm volatile("fence.proxy.async.shared::cta;\n");
unused(xBar.produced.arrive());
}
#if SKIP_SOFTMAX_ATTN && SKIP_SOFTMAX_ATTN_BLOCK_STATS
if (threadIdx.x == 0 && skippedBlockCount != nullptr && totalBlockCount != nullptr)
{
atomicAdd(skippedBlockCount, localSkippedBlockCount);
atomicAdd(totalBlockCount, nbIters);
}
#endif
unused(smem.qBar.consumed.arrive());
}
else if (warpIdx.z == 1)
@ -1043,216 +1135,231 @@ CUBIN_EXPORT __global__
uint32_t idxVTile = idxVTileInit + idxIter * nbSubSeq;
auto const idxVBuf = idxIter % SharedMem::nbVBuf;
auto const idxXBuf = idxVBuf;
auto& vBar = smem.vBar[idxVBuf];
arrive_tx_and_wait(vBar.produced, exactDiv(sizeof(SharedMem::VBuffer), gemm1NbThrds));
auto const& vBuf = smem.vBuf(idxVBuf);
#if !SWAP_AB
CtaBarrierPair& vtBar = smem.vtBar[idxVBuf];
auto& vtBuf = smem.vtBuf(idxVBuf);
vtBar.consumed.arrive_and_wait();
transposeVTile(warpRank, laneId(), vtBuf, vBuf);
vBar.consumed.arrive();
vtBar.produced.arrive();
#endif
auto& xBar = smem.xBar[idxXBuf];
auto& vBar = smem.vBar[idxVBuf];
auto const& vBuf = smem.vBuf(idxVBuf);
xBar.produced.arrive_and_wait();
#if SKIP_SOFTMAX_ATTN
bool shouldSkipSoftmaxAttn = smem.skipSoftmaxVotesGemm0ToGemm1[idxXBuf]; // guarded by xBar
if (shouldSkipSoftmaxAttn)
{
vBar.produced.arrive_and_wait();
}
#endif
#if SKIP_SOFTMAX_ATTN
if (!shouldSkipSoftmaxAttn) // skip XVGemm
#endif
{
arrive_tx_and_wait(vBar.produced, exactDiv(sizeof(SharedMem::VBuffer), gemm1NbThrds));
#if !SWAP_AB
CtaBarrierPair& vtBar = smem.vtBar[idxVBuf];
auto& vtBuf = smem.vtBuf(idxVBuf);
vtBar.consumed.arrive_and_wait();
transposeVTile(warpRank, laneId(), vtBuf, vBuf);
vBar.consumed.arrive();
vtBar.produced.arrive();
#endif
#if !defined(NDEBUG) && DBG_PRINT
#if SWAP_AB
if (threadIdx.x == 0)
{
printf("colMax:\n");
for (int i = 0; i < ctaNbQHeads; i++)
{
printf("%f, ", smem.xColMax[idxXBuf][i]);
}
printf("\n");
printf("colSum:\n");
for (int n = 0; n < 4; n++)
if (threadIdx.x == 0)
{
printf("colMax:\n");
for (int i = 0; i < ctaNbQHeads; i++)
{
printf("%f, ", smem.xColSum[idxXBuf][n][i]);
printf("%f, ", smem.xColMax[idxXBuf][i]);
}
printf("\n");
printf("colSum:\n");
for (int n = 0; n < 4; n++)
{
for (int i = 0; i < ctaNbQHeads; i++)
{
printf("%f, ", smem.xColSum[idxXBuf][n][i]);
}
printf("\n");
}
printf("\n");
printf("X:\n");
for (int i = 0; i < ctaNbQHeads; i++)
{
for (int j = 0; j < gemm0CtaTileNbTokens; j++)
{
auto const& elemsPerXPart = (cacheElemsPerGrain * grainsPerXPart);
auto const e = reinterpret_cast<Vec<__nv_fp8_e4m3, 16>&>(
smem.xBuf(idxXBuf)[j / elemsPerXPart].template at<true>(
i, j % elemsPerXPart / cacheElemsPerGrain))[j % cacheElemsPerGrain];
printf("%.2f, ", float(e));
if (j % 16 == 15)
{
printf("| ");
}
}
printf("\n\n");
}
}
smem.gemm1WarpGrpBar.arrive_and_wait();
#else
if (blockIdx.y == 1 && threadIdx.x == 0)
{
printf("rowMax:\n");
for (int i = 0; i < ctaNbQHeads; i++)
{
printf("%f, ", smem.xRowMax[idxXBuf][i]);
}
printf("\n");
printf("rowSum:\n");
for (int i = 0; i < ctaNbQHeads; i++)
{
printf("%f, ", smem.xRowSum[idxXBuf][i]);
}
printf("\n");
}
printf("\n");
printf("X:\n");
for (int i = 0; i < ctaNbQHeads; i++)
{
for (int j = 0; j < gemm0CtaTileNbTokens; j++)
{
auto const& elemsPerXPart = (cacheElemsPerGrain * grainsPerXPart);
auto const e = reinterpret_cast<Vec<__nv_fp8_e4m3, 16>&>(
smem.xBuf(idxXBuf)[j / elemsPerXPart].template at<true>(
i, j % elemsPerXPart / cacheElemsPerGrain))[j % cacheElemsPerGrain];
printf("%.2f, ", float(e));
if (j % 16 == 15)
{
printf("| ");
}
}
printf("\n\n");
}
}
smem.gemm1WarpGrpBar.arrive_and_wait();
#else
if (blockIdx.y == 1 && threadIdx.x == 0)
{
printf("rowMax:\n");
for (int i = 0; i < ctaNbQHeads; i++)
{
printf("%f, ", smem.xRowMax[idxXBuf][i]);
}
printf("\n");
printf("rowSum:\n");
for (int i = 0; i < ctaNbQHeads; i++)
{
printf("%f, ", smem.xRowSum[idxXBuf][i]);
}
printf("\n");
}
smem.gemm1WarpGrpBar.arrive_and_wait();
smem.gemm1WarpGrpBar.arrive_and_wait();
#endif
#endif
#if SWAP_AB
// @fixme: if first tile, no need to rescale acc. For persistent CTA, just re-initialize acc instead.
rescaleGemm1AccForNewColMax_sync(warpRank, smem.xColMax[idxXBuf], smem.xColSum[idxXBuf],
smem.gemm1AccColMax, acc, smem.gemm1AccColSum, smem.gemm1WarpGrpBar);
// @fixme: if first tile, no need to rescale acc. For persistent CTA, just re-initialize acc instead.
rescaleGemm1AccForNewColMax_sync(warpRank, smem.xColMax[idxXBuf], smem.xColSum[idxXBuf],
smem.gemm1AccColMax, acc, smem.gemm1AccColSum, smem.gemm1WarpGrpBar);
#else
rescaleGemm1AccForNewRowMax_sync(
warpRank, smem.xRowMax[idxXBuf], smem.xRowSum[idxXBuf], smem.gemm1AccColMax, acc, smem.gemm1AccColSum);
rescaleGemm1AccForNewRowMax_sync(warpRank, smem.xRowMax[idxXBuf], smem.xRowSum[idxXBuf],
smem.gemm1AccColMax, acc, smem.gemm1AccColSum);
#endif
auto& xBuf = smem.xBuf(idxXBuf);
auto& xBuf = smem.xBuf(idxXBuf);
auto const descXBase = gmma::makeMatDesc(nullptr, 0, SharedMem::XBuffer::Elem::rowBytes * 8,
gmma::getSwizzleMode<true>(SharedMem::XBuffer::Elem{}))
.raw();
auto const descXBase = gmma::makeMatDesc(nullptr, 0, SharedMem::XBuffer::Elem::rowBytes * 8,
gmma::getSwizzleMode<true>(SharedMem::XBuffer::Elem{}))
.raw();
#if CACHE_ELEM_ENUM == 0
auto const descVBase = gmma::makeMatDesc(nullptr, 0, SharedMem::VBuffer::Elem::rowBytes * 8,
gmma::getSwizzleMode<true>(SharedMem::VBuffer::Elem{}))
.raw();
auto const descVBase = gmma::makeMatDesc(nullptr, 0, SharedMem::VBuffer::Elem::rowBytes * 8,
gmma::getSwizzleMode<true>(SharedMem::VBuffer::Elem{}))
.raw();
#endif
#if SWAP_AB
//@fixme: to reduce code size, we can disable unroll and use double-buffer for LDSM in loadVTileTransposed.
#pragma unroll
for (uint32_t idxInstK = 0; idxInstK < gemm1NbGmmaInstK; idxInstK++)
{
for (uint32_t idxInstK = 0; idxInstK < gemm1NbGmmaInstK; idxInstK++)
{
#if CACHE_ELEM_ENUM == 2
Vec<RegMatAFrag, gemm1NbGmmaInstM> const fragA
= loadVTileTransposed(warpRank, laneId(), vBuf, idxInstK);
Vec<RegMatAFrag, gemm1NbGmmaInstM> const fragA
= loadVTileTransposed(warpRank, laneId(), vBuf, idxInstK);
#if !defined(NDEBUG) && DBG_PRINT
if (threadIdx.x == 0)
{
printf("fragA:\nidxInstK == %u\n", idxInstK);
}
smem.gemm1WarpGrpBar.arrive_and_wait();
for (int m = 0; m < 2; m++)
{
for (int w = 0; w < 4; w++)
if (threadIdx.x == 0)
{
if (warpRank == w)
printf("fragA:\nidxInstK == %u\n", idxInstK);
}
smem.gemm1WarpGrpBar.arrive_and_wait();
for (int m = 0; m < 2; m++)
{
for (int w = 0; w < 4; w++)
{
if (laneId() == 0)
if (warpRank == w)
{
printf(" warpRank = %u\n", warpRank);
}
__syncwarp();
for (int a = 0; a < 2; a++)
{
for (int b = 0; b < 8; b++)
if (laneId() == 0)
{
for (int c = 0; c < 2; c++)
printf(" warpRank = %u\n", warpRank);
}
__syncwarp();
for (int a = 0; a < 2; a++)
{
for (int b = 0; b < 8; b++)
{
for (int d = 0; d < 4; d++)
for (int c = 0; c < 2; c++)
{
if (laneId() == b * 4 + d)
for (int d = 0; d < 4; d++)
{
for (int e = 0; e < 4; e++)
if (laneId() == b * 4 + d)
{
auto const& elem4 = reinterpret_cast<__nv_fp8_e4m3 const(&)[4]>(
fragA[m](0, c)(a, 0));
printf("%.2f, ", float(elem4[e]));
for (int e = 0; e < 4; e++)
{
auto const& elem4 = reinterpret_cast<__nv_fp8_e4m3 const(&)[4]>(
fragA[m](0, c)(a, 0));
printf("%.2f, ", float(elem4[e]));
}
}
__syncwarp();
}
__syncwarp();
}
if (laneId() == 0)
{
printf("\n");
}
__syncwarp();
}
if (laneId() == 0)
if (laneId() == 0 && a == 0)
{
printf("\n");
printf("----------------------\n");
}
__syncwarp();
}
if (laneId() == 0 && a == 0)
{
printf("----------------------\n");
}
__syncwarp();
}
smem.gemm1WarpGrpBar.arrive_and_wait();
}
smem.gemm1WarpGrpBar.arrive_and_wait();
}
}
#endif
#endif
BoundedVal<grainsPerInstK * gemm1NbGmmaInstK> const kOffsetInGrains{grainsPerInstK * idxInstK};
auto const descX = addAddr(descXBase,
&xBuf[kOffsetInGrains.template divBy<SharedMem::XBuffer::Elem::cols>().get()](
0, kOffsetInGrains.template mod<SharedMem::XBuffer::Elem::cols>().get()));
BoundedVal<grainsPerInstK * gemm1NbGmmaInstK> const kOffsetInGrains{grainsPerInstK * idxInstK};
auto const descX = addAddr(descXBase,
&xBuf[kOffsetInGrains.template divBy<SharedMem::XBuffer::Elem::cols>().get()](
0, kOffsetInGrains.template mod<SharedMem::XBuffer::Elem::cols>().get()));
#if CACHE_ELEM_ENUM == 2
gmma::fence();
gmma::fence();
#endif
#pragma unroll
for (uint32_t idxInstM = 0; idxInstM < gemm1NbGmmaInstM; idxInstM++)
{
for (uint32_t idxInstM = 0; idxInstM < gemm1NbGmmaInstM; idxInstM++)
{
#if CACHE_ELEM_ENUM == 0
auto const descV
= addAddr(descVBase, &vBuf[idxInstM](kOffsetInGrains.get() * cacheElemsPerGrain, 0));
gmma::mma_async_shmA<MathElem, ctaNbQHeads, true, false>(
reinterpret_cast<float(&)[exactDiv(ctaNbQHeads, gmma::instNBase)][2][2]>(acc(idxInstM, 0)),
descV, descX, true);
auto const descV
= addAddr(descVBase, &vBuf[idxInstM](kOffsetInGrains.get() * cacheElemsPerGrain, 0));
gmma::mma_async_shmA<MathElem, ctaNbQHeads, true, false>(
reinterpret_cast<float(&)[exactDiv(ctaNbQHeads, gmma::instNBase)][2][2]>(acc(idxInstM, 0)),
descV, descX, true);
#elif CACHE_ELEM_ENUM == 2
gmma::mma_async_regA<MathElem, ctaNbQHeads>(
reinterpret_cast<float(&)[exactDiv(ctaNbQHeads, gmma::instNBase)][2][2]>(acc(idxInstM, 0)),
reinterpret_cast<uint32_t const(&)[2][2][1]>(fragA[idxInstM]), descX, true);
gmma::mma_async_regA<MathElem, ctaNbQHeads>(
reinterpret_cast<float(&)[exactDiv(ctaNbQHeads, gmma::instNBase)][2][2]>(acc(idxInstM, 0)),
reinterpret_cast<uint32_t const(&)[2][2][1]>(fragA[idxInstM]), descX, true);
#endif
}
gmma::commit_group();
//@fixme: delay wait and consumption to next tile. Note that fragA must also persist until finish of
// gmma.
gmma::wait_group<0>();
}
gmma::commit_group();
//@fixme: delay wait and consumption to next tile. Note that fragA must also persist until finish of
// gmma.
gmma::wait_group<0>();
}
#else
auto const descVTBase = gmma::makeMatDesc(
nullptr, 0, SharedMem::VTBuffer::rowBytes * 8, gmma::getSwizzleMode<true>(SharedMem::VTBuffer{}))
.raw();
vtBar.produced.arrive_and_wait();
auto const descVTBase = gmma::makeMatDesc(
nullptr, 0, SharedMem::VTBuffer::rowBytes * 8, gmma::getSwizzleMode<true>(SharedMem::VTBuffer{}))
.raw();
vtBar.produced.arrive_and_wait();
// if (idxIter == 1 && threadIdx.x == 0) {
// printf("vtBuf:\n");
// dbg::printArray2D<__nv_fp8_e4m3, true>(vtBuf);
// }
#pragma unroll
for (uint32_t m = 0; m < Gemm1Acc::rows; m++)
{
#pragma unroll
for (uint32_t k = 0; k < gemm1NbGmmaInstK; k++)
for (uint32_t m = 0; m < Gemm1Acc::rows; m++)
{
BoundedVal<grainsPerInstK * gemm1NbGmmaInstK> const kOffsetInGrains{grainsPerInstK * k};
auto const descX = addAddr(descXBase,
&xBuf[kOffsetInGrains.template divBy<SharedMem::XBuffer::Elem::cols>().get()](
gmma::instM * m, kOffsetInGrains.template mod<SharedMem::XBuffer::Elem::cols>().get()));
auto const descVT = addAddr(
descVTBase, &vtBuf(0, kOffsetInGrains.template mod<SharedMem::VTBuffer::cols>().get()));
gmma::mma_async_shmA<MathElem, headElems>(
reinterpret_cast<float(&)[exactDiv(headElems, gmma::instNBase)][2][2]>(acc(m, 0)), descX,
descVT, true);
#pragma unroll
for (uint32_t k = 0; k < gemm1NbGmmaInstK; k++)
{
BoundedVal<grainsPerInstK * gemm1NbGmmaInstK> const kOffsetInGrains{grainsPerInstK * k};
auto const descX = addAddr(descXBase,
&xBuf[kOffsetInGrains.template divBy<SharedMem::XBuffer::Elem::cols>().get()](
gmma::instM * m, kOffsetInGrains.template mod<SharedMem::XBuffer::Elem::cols>().get()));
auto const descVT = addAddr(
descVTBase, &vtBuf(0, kOffsetInGrains.template mod<SharedMem::VTBuffer::cols>().get()));
gmma::mma_async_shmA<MathElem, headElems>(
reinterpret_cast<float(&)[exactDiv(headElems, gmma::instNBase)][2][2]>(acc(m, 0)), descX,
descVT, true);
}
}
}
gmma::commit_group();
//@fixme: delay wait and consumption to next tile. Note that fragA must also persist until finish of gmma.
gmma::wait_group<0>();
gmma::commit_group();
//@fixme: delay wait and consumption to next tile. Note that fragA must also persist until finish of
// gmma.
gmma::wait_group<0>();
#endif
}
if (idxIter == nbIters - 1)
{
// gmma::wait_group should have already synchronized threads, so this may be unnecessary.
@ -1471,8 +1578,24 @@ CUBIN_EXPORT __global__
tensorMap
#endif
};
#if SKIP_SOFTMAX_ATTN
for (auto& b : smem.skipSoftmaxXBar)
{
unused(b.consumed.arrive());
}
#endif
for (uint32_t idxIter = 0; idxIter < nbIters; idxIter++)
{
uint32_t const idxVBuf = idxIter % SharedMem::nbVBuf;
auto& vBar = smem.vBar[idxVBuf];
#if SKIP_SOFTMAX_ATTN
uint32_t idxXBuf = idxIter % SharedMem::nbXBuf;
auto& skipSoftmaxXBar = smem.skipSoftmaxXBar[idxXBuf];
skipSoftmaxXBar.produced.arrive_and_wait();
bool shouldSkipSoftmaxAttn = smem.skipSoftmaxVotesGemm0ToV[idxXBuf];
skipSoftmaxXBar.consumed.arrive();
#endif
uint32_t const idxVTile = idxVTileInit + idxIter * nbSubSeq;
vTileLoader.loadPages(idxVTile);
#if USE_INPUT_KV || ENABLE_PDL == 2
@ -1506,8 +1629,20 @@ CUBIN_EXPORT __global__
}
#endif
uint32_t const idxVBuf = idxIter % SharedMem::nbVBuf;
auto& vBar = smem.vBar[idxVBuf];
#if SKIP_SOFTMAX_ATTN
if (shouldSkipSoftmaxAttn)
{
vBar.consumed.arrive_and_wait();
// compared to non-skip softmax attn, we need to increase vBar.produced count to avoid race
// condition where vBar.consumed is arrived again without wait without skip softmax attn, XVGemm
// will wait for tx_count, so its progress won't go ahead of vload warp with skip softmax attn,
// XVGemm WG may go ahead of vload warp, as previous vBar only have XVGemm WG threads and a tx_count
// (now = 0). Then it may arrive vBar.consumed before it is arrive_and_wait-ed
vBar.produced.arrive();
continue;
}
#endif
vBar.consumed.arrive_and_wait();
if (warpElectSync())
{
@ -1517,6 +1652,9 @@ CUBIN_EXPORT __global__
vTileLoader.loadData(smem.vBuf(idxVBuf)[idxPart], idxVTile, idxPart, vBar.produced);
}
}
#if SKIP_SOFTMAX_ATTN
vBar.produced.arrive();
#endif
__syncwarp();
}
}
@ -1992,9 +2130,23 @@ __device__ inline void warpGrpApplyMask(Gemm0Acc& acc, SpecDec const& specDec,
#endif // SPEC_DEC
// smemColMax is persistent across multiple iterations
#if SKIP_SOFTMAX_ATTN
__device__ inline RegColWiseVec computeWarpGrpColMax_sync(CtaBarrier& warpGrpBar, ShmQWiseVec& smemColMax,
Gemm0Acc const& src, float skipSoftmaxThreshold, uint32_t* smemSkipVote, bool maybeSkip)
#else
__device__ inline RegColWiseVec computeWarpGrpColMax_sync(
CtaBarrier& warpGrpBar, ShmQWiseVec& smemColMax, Gemm0Acc const& src)
#endif
{
#if SKIP_SOFTMAX_ATTN
if (threadIdx.x == 0)
{
*smemSkipVote = maybeSkip ? 1U : 0U; // will sync before vote
}
float const lnThreshold
= log(skipSoftmaxThreshold); // this can be -inf, but should be safe as we only use it for comparison
#endif
auto colMax = RegColWiseVec::filled(Vec<float, 2>::filled(safeInitRowMax));
#pragma unroll
for (uint32_t n = 0; n < src.cols; n++)
@ -2029,6 +2181,9 @@ __device__ inline RegColWiseVec computeWarpGrpColMax_sync(
}
uint32_t const lane = laneId();
#if SKIP_SOFTMAX_ATTN
auto prevOrCurrentMax = RegColWiseVec();
#if SKIP_SOFTMAX_ATTN_FIX_THRESHOLD_GREATER_THAN_ONE
if (lane < 4)
{
#pragma unroll
@ -2037,12 +2192,43 @@ __device__ inline RegColWiseVec computeWarpGrpColMax_sync(
#pragma unroll
for (uint32_t j = 0; j < 2; j++)
{
atomicMax(&smemColMax[8 * n + 2 * lane + j], colMax[n][j]);
prevOrCurrentMax[n][j] = smemColMax[8 * n + 2 * lane + j];
}
}
}
warpGrpBar.arrive_and_wait();
#endif
#endif
if (lane < 4)
{
#pragma unroll
for (uint32_t n = 0; n < src.cols; n++)
{
#pragma unroll
for (uint32_t j = 0; j < 2; j++)
{
#if SKIP_SOFTMAX_ATTN && !SKIP_SOFTMAX_ATTN_FIX_THRESHOLD_GREATER_THAN_ONE
// prevOrCurrentMax <= actual smemColMax (after updates from all 4 warps done), but always >=
// smemColMax(Prev), the smemColMax value *before* this tile is computed.
// When determine whether to skip, it is safe to use prevOrCurrentMax: 1) all 4 warps' localmax <
// smemColMax(Prev), then prevOrCurrentMax == smemColMax(Prev), result not affected; 2) if some localmax
// > smemColMax(Prev), prevOrCurrentMax > smemColMax(Prev), some warps may incorrectly vote skip, but
// at least one warp whose localColMax is larger will not skip, then the tile is not skipped.
// This reduces some sync and check, but has issue when threshold > 1.
prevOrCurrentMax[n][j] = atomicMax(&smemColMax[8 * n + 2 * lane + j], colMax[n][j]);
#else
atomicMax(&smemColMax[8 * n + 2 * lane + j], colMax[n][j]);
#endif
}
}
}
warpGrpBar.arrive_and_wait();
uint32_t const idxInQuad = lane % 4;
#if SKIP_SOFTMAX_ATTN
bool localShouldSkip = true;
#endif
#pragma unroll
for (uint32_t n = 0; n < src.cols; n++)
@ -2050,10 +2236,21 @@ __device__ inline RegColWiseVec computeWarpGrpColMax_sync(
#pragma unroll
for (uint32_t j = 0; j < GmmaAccCoreMat::cols; j++)
{
#if SKIP_SOFTMAX_ATTN
if (lane < 4 && 8 * n + 2 * idxInQuad + j < headGrpSize)
{
localShouldSkip &= (colMax[n][j] - prevOrCurrentMax[n][j]) < lnThreshold;
}
#endif
assert(colMax[n][j] <= smemColMax[8 * n + 2 * idxInQuad + j]);
colMax[n][j] = smemColMax[8 * n + 2 * idxInQuad + j];
}
}
#if SKIP_SOFTMAX_ATTN
atomicAnd(smemSkipVote, static_cast<uint32_t>(localShouldSkip)); // this will be translated to redux and voteu
#endif
warpGrpBar.arrive_and_wait();
return colMax;
}
@ -2199,7 +2396,7 @@ __device__ inline void storeGemm0AccToShm(
uint32_t const idxOctInsideHalf = idxInHalf / 8;
uint32_t const idxRowInsideOct = lane % 8;
uint32_t const warpBaseC = 16 * warpRank;
auto const toAccCoords = [](uint32_t const idxAccCoreMat) -> std::pair<uint32_t, uint32_t>
auto const toAccCoords = [](uint32_t const idxAccCoreMat) -> mha::pair<uint32_t, uint32_t>
{
uint32_t const accR = idxAccCoreMat / Gemm0Acc::cols;
uint32_t const accC = idxAccCoreMat % Gemm0Acc::cols;
@ -3231,6 +3428,24 @@ __device__ inline void storeRotatedPairsForQ(SharedMem::QBuffer& dst,
}
#ifndef GENERATE_CUBIN
uint32_t computeNbSubSeqPerSeqHopperF8MHA(
cudaDeviceProp const& prop, uint32_t batchSize, uint32_t nbKHeads, uint32_t maxSeqLen)
{
auto const env = std::getenv("XQA_NB_SUB_SEQ");
if (env != nullptr)
{
int32_t const val = std::stoi(env);
if (val > 0)
{
return val;
}
}
float const factor = 0.25f;
return mha::min<uint32_t>(
mha::max<uint32_t>(1U, (uint32_t) round(prop.multiProcessorCount * 3 / (batchSize * nbKHeads) * factor)),
divUp(maxSeqLen, gemm0CtaTileNbTokens));
}
void launchHopperF8MHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
#if SLIDING_WINDOW
uint32_t slidingWinSize,
@ -3268,6 +3483,12 @@ void launchHopperF8MHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
// int8/fp8 KV cache.
#if SPEC_DEC
SpecDecParams const& specDecParams,
#endif
#if SKIP_SOFTMAX_ATTN
float const skipSoftmaxThresholdScaleFactor,
#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
uint32_t* __restrict__ skippedBlockCount, uint32_t* __restrict__ totalBlockCount,
#endif
#endif
uint32_t* semaphores, void* scratch, cudaStream_t stream)
{
@ -3286,22 +3507,7 @@ void launchHopperF8MHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
uint32_t const nbVHeads = nbKHeads;
uint32_t const nbQHeads = nbKHeads * headGrpSize;
uint32_t const nbQKVHeads = nbQHeads + nbKHeads + nbVHeads;
uint32_t const nbSubSeqPerSeq = [&]() -> uint32_t
{
auto const env = std::getenv("XQA_NB_SUB_SEQ");
if (env != nullptr)
{
int32_t const val = std::stoi(env);
if (val > 0)
{
return val;
}
}
float const factor = 0.25f;
return mha::min<uint32_t>(
mha::max<uint32_t>(1U, (uint32_t) round(prop.multiProcessorCount * 3 / (batchSize * nbKHeads) * factor)),
divUp(maxSeqLen, gemm0CtaTileNbTokens));
}();
uint32_t const nbSubSeqPerSeq = computeNbSubSeqPerSeqHopperF8MHA(prop, batchSize, nbKHeads, maxSeqLen);
#if SPEC_DEC
uint32_t const qSeqLen = specDecParams.qSeqLen;
#else
@ -3371,6 +3577,12 @@ void launchHopperF8MHA(cudaDeviceProp const& prop, uint32_t nbKHeads,
#endif
#if SPEC_DEC
specDecParams,
#endif
#if SKIP_SOFTMAX_ATTN
skipSoftmaxThresholdScaleFactor,
#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
skippedBlockCount, totalBlockCount,
#endif
#endif
semaphores, scratch);
#else

View File

@ -1272,6 +1272,19 @@ using is_void = is_same<remove_cv_t<T>, void>;
template <typename T>
inline constexpr bool is_void_v = is_void<T>::value;
#endif
#ifndef GENERATE_CUBIN
template <typename T1, typename T2>
using pair = std::pair<T1, T2>;
#else
template <typename T1, typename T2>
struct pair
{
T1 first;
T2 second;
};
#endif
} // namespace mha
#if GENERATE_CUBIN

View File

@ -50,7 +50,8 @@ using Vector = Matrix<Type, Size, 1>;
template <typename MathElem, uint32_t tileSize, bool isPaged, bool useBeamSearch>
Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor> refFlashAttention(IOHead const* q,
CacheSeq<isPaged, useBeamSearch> const& k, CacheSeq<isPaged, useBeamSearch> const& v, uint32_t seqLen, float qScale,
float kvScale, float xScale, uint32_t slidingWinSize, float* attentionSinks)
float kvScale, float xScale, uint32_t slidingWinSize, float* attentionSinks, float skipSoftmaxThresholdScaleFactor,
uint32_t* skippedBlockCount, uint32_t* totalBlockCount, uint32_t multiBlockNum)
{
uint32_t const nbTiles = divUp(seqLen, tileSize);
auto gemm1Acc = Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor>::Zero().eval();
@ -61,6 +62,16 @@ Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor> refFlashAt
float const qkScale = qScale * kvScale / sqrtf(validElemsPerHead);
uint32_t const seqBeg = (seqLen < slidingWinSize ? 0 : seqLen - slidingWinSize);
uint32_t const idxTileBeg = seqBeg / tileSize;
uint32_t const nbSubSeq = (multiBlockNum > 0 && nbTiles >= 2) ? mha::min(nbTiles, multiBlockNum) : 1;
std::vector<Eigen::Vector<float, headGrpSize>> skipRowMaxs(nbSubSeq);
for (uint32_t i = 0; i < nbSubSeq; i++)
{
skipRowMaxs[i].fill(-INFINITY);
}
bool const disableSkipForShortSeq = (seqLen < skipSoftmaxThresholdScaleFactor);
float const skipSoftmaxThreshold = disableSkipForShortSeq ? 0.0f : skipSoftmaxThresholdScaleFactor / seqLen;
for (uint32_t idxTile = idxTileBeg; idxTile < nbTiles; idxTile++)
{
Eigen::Matrix<float, headGrpSize, tileSize, Eigen::RowMajor> gemm0Acc;
@ -88,7 +99,22 @@ Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor> refFlashAt
}
}
Eigen::Vector<float, headGrpSize> const tileRowMax = gemm0Acc.rowwise().maxCoeff().cwiseMax(rowMax).eval();
Eigen::Vector<float, headGrpSize> const localRowMax = gemm0Acc.rowwise().maxCoeff().eval();
Eigen::Vector<float, headGrpSize> const tileRowMax = localRowMax.cwiseMax(rowMax).eval();
auto const prevSkipRowMax = skipRowMaxs[idxTile % nbSubSeq];
skipRowMaxs[idxTile % nbSubSeq] = localRowMax.cwiseMax(skipRowMaxs[idxTile % nbSubSeq]).eval();
if (!disableSkipForShortSeq && skipSoftmaxThreshold > 0)
{
*totalBlockCount += 1;
auto const skipSoftmaxMask = ((localRowMax - prevSkipRowMax).array() < std::log(skipSoftmaxThreshold));
bool const skipBlock = skipSoftmaxMask.all() && ((idxTile - idxTileBeg) >= nbSubSeq);
if (skipBlock)
{
*skippedBlockCount += 1;
continue;
}
}
Eigen::Matrix<float, headGrpSize, tileSize, Eigen::RowMajor> tileX
= (gemm0Acc.colwise() - tileRowMax).array().exp().eval();
@ -138,7 +164,8 @@ Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor> refFlashAt
template Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor> \
refFlashAttention<prec, tileSize, isPaged, useBeamSearch>(IOHead const* q, \
CacheSeq<isPaged, useBeamSearch> const& k, CacheSeq<isPaged, useBeamSearch> const& v, uint32_t seqLen, \
float qScale, float kvScale, float xScale, uint32_t slidingWinSize, float* attentionSinks)
float qScale, float kvScale, float xScale, uint32_t slidingWinSize, float* attentionSinks, \
float skipSoftmaxThreshold, uint32_t* skippedBlockCount, uint32_t* totalBlockCount, uint32_t multiBlockNum)
INSTANTIATE_refFlashAttention(CacheElem, 64, false, false);
INSTANTIATE_refFlashAttention(CacheElem, 64, false, true);

View File

@ -88,7 +88,8 @@ struct CacheSeq<true, true>
template <typename MathElem, uint32_t tileSize, bool isPaged, bool useBeamSearch>
Eigen::Matrix<float, headGrpSize, validElemsPerHead, Eigen::RowMajor> refFlashAttention(IOHead const* q,
CacheSeq<isPaged, useBeamSearch> const& k, CacheSeq<isPaged, useBeamSearch> const& v, uint32_t seqLen, float qScale,
float kvScale, float xScale, uint32_t slidingWinSize, float* attentionSinks);
float kvScale, float xScale, uint32_t slidingWinSize, float* attentionSinks, float skipSoftmaxThresholdScaleFactor,
uint32_t* skippedBlockCount, uint32_t* totalBlockCount, uint32_t multiBlockNum);
template <typename MathElem, bool isPaged, bool useBeamSearch>
#if SPEC_DEC

View File

@ -150,7 +150,8 @@ template <uint32_t nbKHeads>
#endif
#endif
void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck, bool verbose = false,
bool saveData = false, bool hasAttentionSinks = false, uint32_t ctxLen = ~0U, uint32_t slidingWinSize = 1U << 30)
bool saveData = false, bool hasAttentionSinks = false, uint32_t ctxLen = ~0U, uint32_t slidingWinSize = 1U << 30,
float skipSoftmaxThresholdScaleFactor = 0.0f)
{
#if IS_MLA
if (nbKHeads != 1)
@ -224,6 +225,12 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
seqLen = (16U << 20) / gmemCacheHeadBytes; // 32MB per K+V head.
}
ctxLen = std::min(ctxLen, seqLen);
uint32_t skippedBlockCount = 0;
uint32_t totalBlockCount = 0;
if (skipSoftmaxThresholdScaleFactor > 0)
{
assert(useQGMMA);
}
float const kScale = cacheElemSize == 2 ? 1.f : 1 / 4.f;
float const vScale = kScale;
float const qScale = 1.f;
@ -329,6 +336,17 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
auto const rcpOutScale = ManagedMemBuf<float>(1);
auto const seqLenList = ManagedMemBuf<uint32_t[beamWidth]>(batchSize);
auto const ctxLenList = ManagedMemBuf<uint32_t[beamWidth]>(batchSize);
#if SKIP_SOFTMAX_ATTN
#ifdef SKIP_SOFTMAX_ATTN_BLOCK_STATS
auto const kernelSkippedBlockCount = ManagedMemBuf<uint32_t>(1);
auto const kernelTotalBlockCount = ManagedMemBuf<uint32_t>(1);
kernelSkippedBlockCount[0] = 0;
kernelTotalBlockCount[0] = 0;
#endif
#else
EXPECT_EQ(skipSoftmaxThresholdScaleFactor, 0.0f)
<< "Got non-zero skipSoftmaxThresholdScaleFactor while SKIP_SOFTMAX_ATTN is not enabled.";
#endif
#if USE_PAGED_KV_CACHE
auto const pageListBuf = ManagedMemBuf<std::byte>(pageListBytes);
#if PAGED_KV_CACHE_LAYOUT == 1
@ -726,6 +744,11 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
maxSeqLen, &seqLenList[0][0], batchSize, kvCacheScale.get(), semaphores.get(), scratch, stream);
};
#else
auto multiBlockNum = [&]()
{
auto const calcFunc = useQGMMA ? &computeNbSubSeqPerSeqHopperF8MHA : &computeNbSubSeqPerSeqMHA;
return calcFunc(prop, batchSize, nbKHeads, maxSeqLen);
}();
auto runKernel = [&]()
{
auto const launchFunc = useQGMMA ? &launchHopperF8MHA : &launchMHA;
@ -776,6 +799,12 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
batchSize, kvCacheScale.get(),
#if SPEC_DEC
specDecParams,
#endif
#if SKIP_SOFTMAX_ATTN
skipSoftmaxThresholdScaleFactor,
#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
kernelSkippedBlockCount.get(), kernelTotalBlockCount.get(),
#endif
#endif
semaphores.get(), scratch, stream);
checkCuda(cudaGetLastError());
@ -813,6 +842,10 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
checkCuda(cudaEventRecord(toc, stream));
prefetchToDevice(cudaCpuDeviceId);
checkCuda(cudaStreamSynchronize(stream));
#if SKIP_SOFTMAX_ATTN && SKIP_SOFTMAX_ATTN_BLOCK_STATS
kernelSkippedBlockCount[0] /= nbIters;
kernelTotalBlockCount[0] /= nbIters;
#endif
if (testPerf)
{
float ms;
@ -849,6 +882,15 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
= totalNbCacheLoadBytes + inputBytes + outputBytes; // we ignore page indices and beam search indices.
float const dramSolTime = totalTraffic / bandwidth * 1E3f;
float const dramSolRatio = dramSolTime / ms;
#if SKIP_SOFTMAX_ATTN && SKIP_SOFTMAX_ATTN_BLOCK_STATS
size_t const totalNbCacheLoadWithSkip = gmemCacheHeadBytes
* (nbKHeads + nbVHeads * (1 - 1.0f * kernelSkippedBlockCount[0] / kernelTotalBlockCount[0]))
* nbLoadedCacheTokens;
float const totalTrafficWithSkip
= totalNbCacheLoadWithSkip + inputBytes + outputBytes; // we ignore page indices and beam search indices.
float const dramSolTimeWithSkip = totalTrafficWithSkip / bandwidth * 1E3f;
float const dramSolRatioWithSkip = dramSolTimeWithSkip / ms;
#endif
if (verbose)
{
printf("done\n");
@ -863,7 +905,13 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
}
float const tops = headGrpSize * qSeqLen * float(seqLen) * (validElemsPerKHead + validElemsPerVHead) * 2
* nbKHeads * batchSize / (ms * 1E-3F) * 1E-12F;
#if SKIP_SOFTMAX_ATTN && SKIP_SOFTMAX_ATTN_BLOCK_STATS
printf("kernel skippedBlockCount: %d/%d (%.2f%%)\n", kernelSkippedBlockCount[0], kernelTotalBlockCount[0],
kernelTotalBlockCount[0] == 0 ? 0.0f : 100.0f * kernelSkippedBlockCount[0] / kernelTotalBlockCount[0]);
printf("dramSolRatioWithSkip: %f%% (%f ms, TOPS = %f)\n", dramSolRatioWithSkip * 100, ms, tops);
#else
printf("dramSolRatio: %f%% (%f ms, TOPS = %f)\n", dramSolRatio * 100, ms, tops);
#endif
}
if (refCheck)
{
@ -1084,8 +1132,8 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
if (useQGMMA)
{
refOutput = refFlashAttention<CacheElem, 64>(&qHeads[req][b][headGrpSize * idxKHead], kCacheSeq,
vCacheSeq, seqLen, qScaleForRef, kvCacheScale[0], xScale, slidingWinSize,
refAttentionSinks);
vCacheSeq, seqLen, qScaleForRef, kvCacheScale[0], xScale, slidingWinSize, refAttentionSinks,
skipSoftmaxThresholdScaleFactor, &skippedBlockCount, &totalBlockCount, multiBlockNum);
// refOutput = refAttention<CacheElem>(&qHeads[req][b][headGrpSize * idxKHead], kCacheSeq,
// vCacheSeq, seqLen, qScaleForRef, kvCacheScale[0], xScale, slidingWinSize);
}
@ -1132,6 +1180,14 @@ void runTest(uint32_t batchSize, uint32_t seqLen, bool testPerf, bool refCheck,
#endif
}
}
#if SKIP_SOFTMAX_ATTN
printf("host skippedBlockCount: %d/%d (%.2f%%)\n", skippedBlockCount, totalBlockCount,
totalBlockCount == 0 ? 0.0f : 100.0f * skippedBlockCount / totalBlockCount);
#if SKIP_SOFTMAX_ATTN_BLOCK_STATS
printf("kernel skippedBlockCount: %d/%d (%.2f%%)\n", kernelSkippedBlockCount[0], kernelTotalBlockCount[0],
kernelTotalBlockCount[0] == 0 ? 0.0f : 100.0f * kernelSkippedBlockCount[0] / kernelTotalBlockCount[0]);
#endif
#endif
if (saveData)
{
fout_refOutput.close();
@ -1253,6 +1309,14 @@ TEST(RefCheck, llama_V2_70b)
#if SLIDING_WINDOW
runTest<2>(2, 4096, false, true, false, false, false, ~0, 256);
runTest<2>(2, 400, false, true, false, false, false, ~0U, 256);
#endif
#if SKIP_SOFTMAX_ATTN
runTest<1>(32, 2048, false, true, false, false, false, ~0U, 1U << 30, 0.f);
runTest<4>(32, 1538, false, true, false, false, false, ~0U, 1U << 30, 1280.f);
runTest<2>(32, 4096, false, true, false, false, false, ~0U, 1U << 30, 125.f);
runTest<4>(32, 300, false, true, false, false, false, ~0U, 1U << 30, 80.f);
runTest<4>(32, 500, false, true, false, false, false, ~0U, 1U << 30, 501.0f);
runTest<4>(32, 500, false, true, false, false, false, ~0U, 1U << 30, 500.f);
#endif
runTest<8>(120, 367, false, true);
runTest<8>(1792, 2048, false, true);

View File

@ -298,6 +298,11 @@ bool AttentionOp::convertMMHAParamsToXQAParams(tensorrt_llm::kernels::XQAParams&
xqaParams.use_sparse_attention = useTllmGenSparseAttention();
// Skip softmax threshold.
xqaParams.skip_softmax_threshold_scale_factor = mSkipSoftmaxThresholdScaleFactorDecode;
#ifdef SKIP_SOFTMAX_STAT
// Statistics of skip-softmax, pointers of device memory for output
xqaParams.skip_softmax_total_blocks = mSkipSoftmaxTotalBlocks;
xqaParams.skip_softmax_skipped_blocks = mSkipSoftmaxSkippedBlocks;
#endif
// Cross attention parameters.
xqaParams.encoder_input_lengths = generationsParams.encoder_input_lengths;

View File

@ -105,7 +105,8 @@ CubinObj CompileEngine::compile() const
// scratch in this case.
/*use_input_kv=*/applyRoPEInXqaKernel,
/*rope_style=*/ropeStyle,
/*is_spec_dec_tree=*/mXqaParams.is_spec_dec_tree};
/*is_spec_dec_tree=*/mXqaParams.is_spec_dec_tree,
/*use_skip_softmax_attn=*/mXqaParams.skip_softmax_threshold_scale_factor != 0};
if (context.kernel_type == TLLM_XQA_JIT_MLA)
{
auto const& c = context;

View File

@ -232,6 +232,7 @@ void DecoderXQAImplJIT::runImpl(XQAParams const& xqaParams, KVCacheBuffer const&
jit::CubinObj const* const cubinObj = mResource->getCubinObjRegistry()->getCubin(key);
TLLM_CHECK(cubinObj != nullptr && cubinObj->isInitialized());
bool const isSpecDec = xqaParams.multi_query_tokens;
bool const isSkipSoftmax = xqaParams.skip_softmax_threshold_scale_factor != 0;
bool const isHMMAKernel = (cubinObj->getKernelType() == XQAKernelType::kAMPERE_WARP_SPECIALIZED);
bool const isGMMAKernel = (cubinObj->getKernelType() == XQAKernelType::kHOPPER_WARP_SPECIALIZED);
bool const isMLAKernel = (cubinObj->getKernelType() == XQAKernelType::kSM120_MLA);
@ -378,7 +379,7 @@ void DecoderXQAImplJIT::runImpl(XQAParams const& xqaParams, KVCacheBuffer const&
.mask = reinterpret_cast<SpecDecParams::MaskType const*>(xqaParams.spec_decoding_packed_mask)};
};
constexpr uint32_t kMAX_NB_KERNEL_PARAMS = 16;
constexpr uint32_t kMAX_NB_KERNEL_PARAMS = 19;
uint32_t idxNextParam = 0;
void* kernelParams[kMAX_NB_KERNEL_PARAMS];
auto appendParam = [&](auto* p) mutable
@ -514,6 +515,16 @@ void DecoderXQAImplJIT::runImpl(XQAParams const& xqaParams, KVCacheBuffer const&
appendParam(&specDecParams);
specDecBlocks = divUp(specDecParams.qSeqLen, 64 / num_q_heads_over_kv);
}
if (isSkipSoftmax)
{
TLLM_CHECK_WITH_INFO(isGMMAKernel, "skip softmax is only supported for GMMA kernel for now.");
TLLM_CHECK_WITH_INFO(!isSpecDec, "skip softmax is not supported with spec dec for now.");
appendParam(&xqaParams.skip_softmax_threshold_scale_factor);
#ifdef SKIP_SOFTMAX_STAT
appendParam(&xqaParams.skip_softmax_total_blocks);
appendParam(&xqaParams.skip_softmax_skipped_blocks);
#endif
}
appendParam(&launchParams.semaphores);
appendParam(&launchParams.scratch);
kernelParams[idxNextParam] = nullptr; // one extra nullptr at end as guard.

View File

@ -96,10 +96,16 @@ bool supportConfigQGMMA(XQAParams const& xqaParams, int SM, bool forConfigurePlu
{
return false;
}
if (xqaParams.kv_cache_data_type != DATA_TYPE_E4M3)
if (!contains({DATA_TYPE_FP16, DATA_TYPE_BF16, DATA_TYPE_E4M3}, xqaParams.kv_cache_data_type))
{
return false;
}
bool const is_skip_softmax = xqaParams.skip_softmax_threshold_scale_factor != 0;
if (!is_skip_softmax && xqaParams.kv_cache_data_type != DATA_TYPE_E4M3)
{
// Only use hopper kernel with fp16/bf16 kv cache data type when skip softmax is enabled
return false;
}
if (xqaParams.beam_width != 1)
{
return false;
@ -168,6 +174,11 @@ bool supportConfigHMMA(XQAParams const& xqaParams, int SM, bool forConfigurePlug
{
return false;
}
bool const is_skip_softmax = xqaParams.skip_softmax_threshold_scale_factor != 0;
if (is_skip_softmax)
{
return false;
}
return true;
}
@ -201,6 +212,11 @@ bool supportConfigMLA(XQAParams const& xqaParams, int SM, bool forConfigurePlugi
{
return false;
}
bool const is_skip_softmax = xqaParams.skip_softmax_threshold_scale_factor != 0;
if (is_skip_softmax)
{
return false;
}
return true;
}

View File

@ -66,6 +66,7 @@ extern "C"
bool is_spec_dec_tree
= true; // useful only when multi_query_tokens, should be true unless using linear tree in spec-dec.
bool use_skip_softmax_attn;
} tllmXqaJitContext;
// tllmXqaJitProgram is an opaque handle for a program.

View File

@ -215,6 +215,10 @@ tllmXqaJitStatus getMacroFlags(tllmXqaJitContext const* context, std::vector<std
macros["USE_INPUT_KV"] = context->use_input_kv ? "1" : "0";
macros["ROPE_STYLE"] = std::to_string(int(context->rope_style));
macros["IS_SPEC_DEC_TREE"] = context->is_spec_dec_tree ? "1" : "0";
macros["SKIP_SOFTMAX_ATTN"] = context->use_skip_softmax_attn ? "1" : "0";
#ifdef SKIP_SOFTMAX_STAT
macros["SKIP_SOFTMAX_ATTN_BLOCK_STATS"] = context->use_skip_softmax_attn ? "1" : "0";
#endif
// Without these macros, NVRTC uses precompiled headers for cuda_fp16.h etc.
// Linking might fail due to ABI incompatibility.

View File

@ -493,6 +493,10 @@ bool DecoderXQAImplPrecompiled::shouldUse(XQAParams const& xqaParams, bool forCo
{
SUPPORT_RETURN_FALSE("streaming-llm");
}
if (xqaParams.skip_softmax_threshold_scale_factor != 0)
{
SUPPORT_RETURN_FALSE("skip_softmax_threshold_scale_factor");
}
// OPTIMIZE: For the standard generation-phase MHA, there are still extra limitations.
// NOTE: Medusa mode = Multi_query_tokens > 1.

View File

@ -64,6 +64,21 @@ CUtensorMapSwizzle getSwizzleMode(uint32_t partBytes)
}
};
CUtensorMapDataType_enum getDataTypeFromXqaParams(XQAParams const& xqaParams)
{
if (xqaParams.kv_cache_data_type == DATA_TYPE_BF16)
{
return CU_TENSOR_MAP_DATA_TYPE_BFLOAT16;
}
else if (xqaParams.kv_cache_data_type == DATA_TYPE_FP16)
{
return CU_TENSOR_MAP_DATA_TYPE_FLOAT16;
}
TLLM_CHECK(xqaParams.kv_cache_data_type == DATA_TYPE_E4M3 || xqaParams.kv_cache_data_type == DATA_TYPE_E5M2
|| xqaParams.kv_cache_data_type == DATA_TYPE_INT8);
return CU_TENSOR_MAP_DATA_TYPE_UINT8;
}
CUtensorMap makeTensorMapForQ(std::shared_ptr<CUDADriverWrapper> const& driver, void const* addr,
CUtensorMapDataType_enum dataType, uint32_t headElems, uint32_t totalNbHeads, uint32_t partElems, uint32_t boxHeads)
{
@ -131,24 +146,26 @@ CUtensorMap makeTensorMapForHopperXqaKVCache(
if constexpr (std::is_same_v<KVCacheBuffer, KVBlockArray>)
{
uint32_t const headElems = xqaParams.head_size;
uint32_t const elemBytes = getElemBytes(CU_TENSOR_MAP_DATA_TYPE_UINT8);
CUtensorMapDataType_enum const dataType = getDataTypeFromXqaParams(xqaParams);
uint32_t const elemBytes = getElemBytes(dataType);
TLLM_CHECK(headElems <= 256);
uint32_t const paddedHeadElems = headElems <= 64 ? 64 : (headElems <= 128 ? 128 : 256);
uint32_t const partElems = std::min(elemBytes * paddedHeadElems, 128U) / elemBytes;
return makeTensorMapForPagedKVCache(driver, kv_cache_buffer.mPrimaryPoolPtr, CU_TENSOR_MAP_DATA_TYPE_UINT8,
xqaParams.head_size, xqaParams.num_kv_heads, xqaParams.tokens_per_block, partElems);
return makeTensorMapForPagedKVCache(driver, kv_cache_buffer.mPrimaryPoolPtr, dataType, xqaParams.head_size,
xqaParams.num_kv_heads, xqaParams.tokens_per_block, partElems);
}
else
{
static_assert(std::is_same_v<KVCacheBuffer, KVLinearBuffer>);
uint32_t const headElems = xqaParams.head_size;
uint32_t const elemBytes = getElemBytes(CU_TENSOR_MAP_DATA_TYPE_UINT8);
CUtensorMapDataType_enum const dataType = getDataTypeFromXqaParams(xqaParams);
uint32_t const elemBytes = getElemBytes(dataType);
TLLM_CHECK(headElems <= 256);
uint32_t const paddedHeadElems = headElems <= 64 ? 64 : (headElems <= 128 ? 128 : 256);
uint32_t const partElems = std::min(elemBytes * paddedHeadElems, 128U) / elemBytes;
return makeTensorMapForContiguousKVCache(driver, kv_cache_buffer.data, CU_TENSOR_MAP_DATA_TYPE_UINT8,
xqaParams.head_size, xqaParams.num_kv_heads, xqaParams.max_attention_window_size, xqaParams.beam_width,
xqaParams.batch_size, partElems);
return makeTensorMapForContiguousKVCache(driver, kv_cache_buffer.data, dataType, xqaParams.head_size,
xqaParams.num_kv_heads, xqaParams.max_attention_window_size, xqaParams.beam_width, xqaParams.batch_size,
partElems);
}
}
@ -161,11 +178,12 @@ template <typename KVCacheBuffer>
CUtensorMap makeTensorMapForXqaMlaKVCache(std::shared_ptr<tensorrt_llm::common::CUDADriverWrapper> const& driver,
XQAParams const& xqaParams, KVCacheBuffer const& kv_cache_buffer, bool forK)
{
CUtensorMapDataType_enum const dataType = getDataTypeFromXqaParams(xqaParams);
uint32_t const partElems = (forK ? 64 : 128);
if constexpr (std::is_same_v<KVCacheBuffer, KVBlockArray>)
{
return makeTensorMapForPagedKVCache(driver, kv_cache_buffer.mPrimaryPoolPtr, CU_TENSOR_MAP_DATA_TYPE_UINT8,
xqaParams.head_size, xqaParams.num_kv_heads, xqaParams.tokens_per_block, partElems);
return makeTensorMapForPagedKVCache(driver, kv_cache_buffer.mPrimaryPoolPtr, dataType, xqaParams.head_size,
xqaParams.num_kv_heads, xqaParams.tokens_per_block, partElems);
}
else
{
@ -183,7 +201,7 @@ CUtensorMap makeTensorMapForXqaMlaQ(
std::shared_ptr<tensorrt_llm::common::CUDADriverWrapper> const& driver, XQAParams const& xqaParams, void const* q)
{
uint32_t const partElems = 64;
return makeTensorMapForQ(driver, q, CU_TENSOR_MAP_DATA_TYPE_UINT8, xqaParams.head_size,
return makeTensorMapForQ(driver, q, getDataTypeFromXqaParams(xqaParams), xqaParams.head_size,
xqaParams.num_q_heads * xqaParams.total_num_input_tokens, partElems, xqaParams.num_q_heads);
}
} // namespace kernels

View File

@ -119,7 +119,12 @@ struct XQAParams
bool use_sparse_attention = false;
// Skip softmax threshold.
float skip_softmax_threshold_scale_factor = 0.0f;
float skip_softmax_threshold_scale_factor = 0;
#ifdef SKIP_SOFTMAX_STAT
uint32_t* skip_softmax_total_blocks = nullptr;
uint32_t* skip_softmax_skipped_blocks = nullptr;
#endif
cudaStream_t stream = 0;
// layer index
@ -199,6 +204,10 @@ struct XQAParams
<< "sparse_params: " << sparse_params.toString() << std::endl
<< "use_sparse_attention :" << (use_sparse_attention ? "true" : "false") << std ::endl
<< "skip_softmax_threshold_scale_factor :" << skip_softmax_threshold_scale_factor << std ::endl
#ifdef SKIP_SOFTMAX_STAT
<< "skip_softmax_total_blocks :" << skip_softmax_total_blocks << std ::endl
<< "skip_softmax_skipped_blocks :" << skip_softmax_skipped_blocks << std ::endl
#endif
<< "stream :" << stream;
return ss.str();

Binary file not shown.

After

Width:  |  Height:  |  Size: 220 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 512 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 329 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 598 KiB

View File

@ -0,0 +1,423 @@
# Optimizing DeepSeek-V3.2 on NVIDIA Blackwell GPUs
By NVIDIA TensorRT LLM team
## Table of Contents
- [Optimizing DeepSeek-V3.2 on NVIDIA Blackwell GPUs](#optimizing-deepseek-v32-on-nvidia-blackwell-gpus)
- [Table of Contents](#table-of-contents)
- [Introduction](#introduction)
- [DeepSeek Sparse Attention (DSA)](#deepseek-sparse-attention-dsa)
- [Precision Strategy](#precision-strategy)
- [Parallel Strategy](#parallel-strategy)
- [Key Features](#key-features)
- [MTP](#mtp)
- [Disaggregated Serving](#disaggregated-serving)
- [Chunked Prefill and KV Cache Reuse](#chunked-prefill-and-kv-cache-reuse)
- [Wide Expert Parallelism (Wide-EP)](#wide-expert-parallelism-wide-ep)
- [Chat Template and Tool Parser](#chat-template-and-tool-parser)
- [Key Optimizations](#key-optimizations)
- [Kernel Optimizations](#kernel-optimizations)
- [Sparse MLA Kernel](#sparse-mla-kernel)
- [Indexer Top-K Kernel](#indexer-top-k-kernel)
- [DeepGEMM MQA Kernel](#deepgemm-mqa-kernel)
- [Kernel Fusion](#kernel-fusion)
- [System Optimizations](#system-optimizations)
- [Multi-steams](#multi-steams)
- [A Fast Path for Short Sequences](#a-fast-path-for-short-sequences)
- [How to Reproduce](#how-to-reproduce)
- [Accuracy Evaluation](#accuracy-evaluation)
- [Benchmark on B200](#benchmark-on-b200)
- [Min-latency](#min-latency)
- [Max-throughput](#max-throughput)
- [Benchmark with Wide-EP on GB200](#benchmark-with-wide-ep-on-gb200)
- [Future Works](#future-works)
- [Acknowledgement](#acknowledgement)
## Introduction
The open-sourced [DeepSeek-V3.2](https://api-docs.deepseek.com/news/news251201) series models proposed a new architecture with a fine-grained sparse attention mechanism, called DeepSeek Sparse Attention (DSA). It can help the DeepSeek-V3.2 model achieve better efficiency, especially in long sequence scenarios. Although DSA uses a lightweight indexer for prediction, realizing actual speedup from attention sparsity is still challenging. This blog introduces how TensorRT LLM supports key LLM inference features for DeepSeek-v3.2 and optimizes its performance on NVIDIA Blackwell GPUs.
## DeepSeek Sparse Attention (DSA)
DSA serves as a core component of the DeepSeek-v3.2 model, and it is the only architectural modification compared to its predecessors (DeepSeek-V3/R1/V3.1). It is a fine-grained sparse attention mechanism that only selects the important key-value entries for attention computation.
<div align="center">
<figure>
<img src="https://github.com/NVIDIA/TensorRT-LLM/raw/main/docs/source/blogs/media/tech_blog15_dsa_architecture.png" alt="tech_blog15_dsa_architecture" width="700" height="auto">
</figure>
</div>
<p align="center"><sub><em>Figure 1. The architecture of DSA. The green part illustrates how DSA selects the Top-K key-value entries according to the indexer.</em></sub></p>
Figure 1 illustrates the overall architecture: a lightning indexer first determines the importance of all key-value entries for each query token. Subsequently, the Top-K Selector retains only the top-$k$ entries (typically $k=2048$) based on the index scores. Finally, attention is computed exclusively between the query token and these selected entries.
<div align="center">
<figure>
<img src="https://github.com/NVIDIA/TensorRT-LLM/raw/main/docs/source/blogs/media/tech_blog15_indexer_topk.png" alt="tech_blog15_indexer_topk" width="900" height="auto">
</figure>
</div>
<p align="center"><sub><em>Figure 2. The architecture of the DSA indexer and Top-K logics.</em></sub></p>
Figure 2 illustrates the DSA indexer and the Top-K selection mechanism. Firstly, two low-rank linear layers project $c_t^Q$ and the input $h_t$ into lower-dimensional tensors. Following operations of LayerNorm to the K tensor and RoPE to both Q and K, we obtain the tensors $Q_t^I$ and $K_t^I$. Simultaneously, a separate weight projection layer processes $h_t$ to generate the weights $W_t^I$. These tensors are then used to compute the index scores (labeled as MQA Logits in Figure 2):
$$I_{t} = \sum_{j=1}^{h}W_j^I \cdot \text{ReLU}(Q_{t, j}^I (K_t^I)^T)$$
Finally, a Top-K operation is applied to the index scores to identify the most relevant indices, which are subsequently used for the sparse MLA computation. To reduce computational overhead, the K tensor $K_t^I$ is stored in the indexer K cache, allowing for reuse in subsequent iterations.
Regarding implementation, DSA diverges from the MLA used in DeepSeek-V3/R1/V3.1 models, which alternates between MHA mode (prefill) and MQA mode (decoding) as discussed in [Tech Blog 3](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md). Instead, our current DSA implementation operates only in MQA mode for both prefill and decoding phases to maximize kernel efficiency. We are continuing to explore further optimizations, including potential support for MHA mode in future iterations.
The DSA implementation is built upon the TensorRT LLM sparse attention framework, which is designed to provide flexible and extensible support for various sparse attention methods. For more information, please refer to the [sparse attention documentation](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/features/sparse-attention.md), and a technical blog providing further details will be released soon.
## Precision Strategy
Because the DSA is the only architectural modification of DeepSeek-V3.2 from the DeepSeek-R1 model, the mixed precision recipe for other modules is the same as what is used for the DeepSeek-R1. This is the NVFP4 precision strategy used in the DSA module:
- Indexer
- Low-rank linear layers: BF16
- Weight projection layer: FP32, for model accuracy
- MQA:
- Indexer K cache: Blockwise FP8
- Math: Blockwise FP8
- Top-K: FP32
- QKV projection layer: BF16
- Output projection layer: NVFP4
- Sparse MLA
- KV cache: Per-tensor FP8
- Math: Per-tensor FP8
The MoE layers use NVFP4, which is the same as the DeepSeek-R1. Please refer to [Tech Blog 1](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md) and [Tech Blog 3](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md) for the MoE precision strategy. In addition to the NVFP4 version of DeepSeek-V3.2, TensorRT-LLM also supports the original FP8 model, as well as both BF16 and per-tensor FP8 KV caches.
We evaluated the accuracy of this NVFP4 checkpoint on the same datasets:
| | GSM8k | MMLU | GPQA-Diamond |
| :----------------- | :---- | :---- | :----------- |
| [deepseek-ai/DeepSeek-V3.2](https://huggingface.co/deepseek-ai/DeepSeek-V3.2) | 95.91 | 87.84 | 84.34 |
| nvidia/DeepSeek-V3.2-NVFP4<sup>*</sup> | 95.26 | 87.54 | 84.85 |
<sub><em>\* Currently, the NVFP4 checkpoint has not yet been published on Hugging Face. Please stay tuned, or refer to the [How to reproduce](#how-to-reproduce) section to learn how to quantize the model to NVFP4.
** Note there are some run-to-run variance for these evaluations. Our experiments indicate that the NVFP4 recipe delivers accuracy on par with FP8 on these datasets.</em></sub>
## Parallel Strategy
To achieve optimal throughput, DeepSeek-V3.2 adopts the same parallel strategy as DeepSeek-R1. Please refer to [Tech Blog 3](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md) for a detailed explanation of the performance benefits:
| Components | Parallel Patterns |
| :----------------- | :-------------------------- |
| Attention Modules | Data Parallelism 8 (DP8) |
| MoE Sparse Experts | Expert Parallelism 8 (EP8) |
| MoE Shared Experts | DP8 |
| Router GEMM | DP8 |
To scale DeepSeek-V3.2 inference on high-performance systems such as the GB200 NVL72, the model also leverages the parallel strategy from DeepSeek-R1. Please refer to [Tech Blog 4](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.md), [Tech Blog 8](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.md), and [Tech Blog 14](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.md) for more details.
The difference lies in the DSA indexer. When utilizing Tensor Parallelism (TP) for attention modules, typically in latency-oriented scenarios, TP is not applied to the indexer layers. Instead, it is applied exclusively to the MLA components (i.e., the remaining layers of the attention module).
## Key Features
In TensorRT LLM, there are many advanced features that are crucial for maximizing LLM inference performance, such as CUDA Graph, Overlap Scheduler, Speculative Decoding, etc. Given the architectural innovations in DeepSeek-V3.2, ensuring its compatibility with these features is important.
As illustrated in [Tech Blog 3](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md), both CUDA Graph and the Overlap Scheduler offer significant throughput improvements. For CUDA Graph support, which is typically enabled during decoding-only iterations where all requests are in the decoding phase, we must ensure that kernels in the DSA module support graph capture and that input/output tensor shapes remain consistent for a given batch size. Regarding the Overlap Scheduler, it is critical to eliminate any CPU-GPU synchronization within the DSA forward, as this would disrupt the execution pipeline. Other key features are discussed in the following subsections.
### MTP
Multi-Token Prediction (MTP) is a speculative decoding method used in DeepSeek series models. It verifies and accepts multiple draft tokens in a single iteration, significantly improving inference performance in both low-latency and high-throughput scenarios. The DeepSeek-V3.2 also supports MTP. For latency-critical scenarios, as detailed in [Tech Blog 1](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md), MTP-3 is recommended to maximize GPU utilization and achieve optimal performance. For other scenarios, MTP-1 typically offers performance gains as well.
However, the decoding indexer MQA kernel supports sequence lengths of only 1 or 2, limiting native support to MTP-off or MTP-1. To enable MTP > 1, we offer two solutions. The long-term solution involves updating the MQA kernel to support larger sequence lengths, which will be introduced in the MQA kernel optimization section. The immediate workaround (in [PR-9045](https://github.com/NVIDIA/TensorRT-LLM/pull/9045)) uses the existing kernel by flattening the sequence length dimension into the batch dimension, treating the input as a tensor with a sequence length of 1. While this approach ignores the causal mask during the indexer MQA forward, causing discrepancies in the diagonal regions compared to ground truth, the subsequent Top-K kernel handles causal masking correctly. Therefore, the final Top-K indices remain unaffected, allowing this workaround to support MTP-N for any N.
### Disaggregated Serving
Disaggregated serving decouples the prefill and decoding phases, allowing them to run on separate GPU pools with optimized parallel strategies. This feature is crucial for deploying LLMs on high-performance systems like GB200 NVIDIA GPU HWs. However, it requires transferring KV cache blocks from the prefill to the decoding GPUs. DeepSeek-V3.2 introduces an additional 'indexer K cache,' which presents unique challenges for cache management and transmission in a disaggregated setup.
To address this, [PR-8699](https://github.com/NVIDIA/TensorRT-LLM/pull/8699) integrated indexer K cache support into the existing kvCacheManager, enabling it to inherit existing cache features. Subsequently, [PR-8735](https://github.com/NVIDIA/TensorRT-LLM/pull/8735) extended disaggregated serving capabilities to DeepSeek-V3.2, allowing TensorRT LLM to handle the transmission of the indexer K cache. Currently, the implementation specifically targets the indexer K cache, but we plan to generalize this support in future updates.
### Chunked Prefill and KV Cache Reuse
Two additional critical features are chunked prefill and KV cache reuse. Chunked prefill removes input length constraints for long prompts and enables prefill chunks to be batched alongside more decoding requests, boosting throughput. KV cache reuse allows requests sharing common prefixes (e.g., system prompts or multi-turn conversations) to share cached blocks, drastically reducing time-to-first-token (TTFT).
On the implementation side, kvCacheManager already supports the newly introduced indexer K cache, extending compatibility to both chunked prefill and KV cache reuse. Then [PR-9376](https://github.com/NVIDIA/TensorRT-LLM/pull/9376) enabled DSA to perform prefill computation with past tokens saved in the cache, thereby unlocking chunked prefill support. Building on this, [PR-9383](https://github.com/NVIDIA/TensorRT-LLM/pull/9383) implemented KV cache reuse for DeepSeek-V3.2 by reusing the chunked prefill changes.
### Wide Expert Parallelism (Wide-EP)
The Wide-EP is an important feature for boosting inference throughput in large-scale Mixture-of-Experts (MoE) models. For the DeepSeek-V3.2 model, after supporting the disaggregated serving, [PR-9245](https://github.com/NVIDIA/TensorRT-LLM/pull/9245) simply registered the model with the Expert Parallelism Load Balancer (EPLB). This integration allows Wide-EP and EPLB to be enabled, significantly enhancing performance.
### Chat Template and Tool Parser
DeepSeek-V3.2 introduces a new chat template compared to prior versions. This update incorporates support for tool calling and the 'thinking with tools' capability. These enhancements, along with the necessary tool parser, were implemented in [PR-9814](https://github.com/NVIDIA/TensorRT-LLM/pull/9814) and [PR-10126](https://github.com/NVIDIA/TensorRT-LLM/pull/10126). To enable this new chat template when deploying with `trtllm-serve` or `trtllm-eval`, please specify the argument `--custom_tokenizer deepseek_v32`.
## Key Optimizations
DeepSeek-V3.2 can inherit the MoE optimizations from DeepSeek-R1. Consequently, this section focuses exclusively on the DSA part, covering both kernel and system-level optimizations.
### Kernel Optimizations
#### Sparse MLA Kernel
Sparse MLA serves as the core kernel of DSA, enabling attention computation with fine-grained token sparsity. To efficiently support this sparsity pattern, we leverage the new TMALDG.Gather4 instruction on Blackwell GPUs. This instruction loads four rows from a source 2D tensor and coalesces them into a single destination tensor, making it ideal for fine-grained sparse attention operations.
Similar to the dense MLA kernel, FP8 KV cache optimization is crucial for reducing KV cache size and improving E2E throughput. For DSA, we employ per-tensor FP8 quantization: both Query (Q) and Key-Value (KV) tensors are quantized, and FP8 arithmetic is utilized for the sparse MLA computation. To validate the model accuracy under this configuration, the table below presents the GPQA-Diamond accuracy comparison between BF16 and per-tensor FP8 KV cache for the DeepSeek-V3.2-Exp model. PR-8692 introduced this FP8 sparse MLA support, yielding up to a 47.03% improvement in throughput (TPS/GPU).
| KV Cache Type | FP8 checkpoint | NVFP4 checkpoint |
| :--------------------------- | :------------- | :--------------- |
| BF16 Sparse MLA and KV cache | 80.30 | 79.29 |
| FP8 Sparse MLA and KV cache | 78.28 | 80.30 |
Another important optimization is SwapsMmaAb, designed specifically for Tensor Parallelism (TP) scenarios. When TP is enabled for sparse MLA, input tensors are partitioned along the Q head dimension. Consequently, each rank processes a reduced number of Q heads ($128 / \text{TP}$), leading to Tensor Core underutilization. SwapsMmaAb addresses this bottleneck by swapping the A and B operands during matrix multiplication to improve hardware utilization.
#### Indexer Top-K Kernel
DSA contains a module called Top-K Selector. It is a fine-grained token selection mechanism that retrieves only the key-value entries corresponding to the Top-K index scores. The index scores are from Lightning Indexer. This part will select the top 2048 tokens for each query.
##### Deterministic Top-K vs Non-deterministic Top-K
The TopK problem aims to find the largest (or smallest) K elements from a set of N candidates. Because some of the N candidates may have identical values, there can be more than K elements that are tied with the Kth element. In such cases, deciding which of the tied elements are included in the final TopK set affects whether the output is deterministic. If the tied elements are selected randomly, the results will be nondeterministic. Conversely, if we always prioritize elements with smaller indices, the results will be deterministic.
Obtaining deterministic results generally requires a more complex algorithm and incurs higher latency than a nondeterministic version. In DeepSeek V3.2, we first need to determine whether such determinism is actually necessary. We compare the accuracy between the deterministic (DE) and nondeterministic versions of TopK with the GPQA-Diamond dataset. The scores are pretty close:
| GPQA-Diamond | DE Top-K | Non-DE Top-K |
| :----------- | :------ | :---------- |
| FP8 model | 79.8 | 79.9 |
| NVFP4 model | 80.3 | 79.4 |
So we decided to use the nonDE parallel TopK algorithm for DeepSeek V3.2.
##### Radix-select-based Top-K Parallel Algorithm
<div align="center">
<figure>
<img src="https://github.com/NVIDIA/TensorRT-LLM/raw/main/docs/source/blogs/media/tech_blog15_radix_select_topk.png" alt="tech_blog15_radix_select_topk" width="1280" height="auto">
</figure>
</div>
<p align="center"><sub><em>Figure 3. Radix-select-based Top-K.</em></sub></p>
In general, there are two kinds of parallel TopK algorithms: partitionbased methods and priorityqueuebased methods. The runtime of existing priorityqueue approaches grows rapidly as K increases, and the K value is as large as 2048 for the indexer Top-K in deepseek v3.2, so we choose partitionbased methods instead. Specifically, we adopt radixselect as our baseline.
For 32bit values with 8bit digits, a naïve radix TopK algorithm runs 4 iterations, with 4 kernel launches per iteration. In each iteration, it (1) Histogram: counts how many elements fall into each digit bucket based on the current bits; (2) Prefix Sum: builds a prefix sum over these bucket counts; (3) Find target digits: identifies which bucket contains the Kth element; and (4) Filtering: keeps all elements in smaller buckets as definite TopK, discards elements in larger buckets, and passes elements in the target bucket to the next iteration as new candidates.
##### Optimizations for Indexer Top-K
**Skip iterations with parallel sorting.** In addition to the basic radixselect method, we introduce further optimizations to speed up the TopK computation. In practice, with either 8bit radix select (four iterations) or 11bit radix select (three iterations), the number of candidates typically drops sharply after the first one or two iterations on real datasets.
Our key optimization is to bypass the remaining radixselect iterations and switch to a parallel sort once the candidate set becomes sufficiently small (smaller than 2048 in the current implementation). When the number of candidates is relatively small, we use a low-overhead naive O(N²) comparison-based ranking algorithm. For each element, we compare it against all others to determine its final position, and if this position is smaller than K, we keep it as part of the TopK output. Otherwise, we use the parallel sort from CUB to get the results. The basic implementation and this optimization were added in [PR-8882](https://github.com/NVIDIA/TensorRT-LLM/pull/8882).
**Specialization for different cases.** When running with real datasets, we found that the number of candidates reaching the final sorting stage was larger than expected, which resulted in higher runtime overhead. To address this issue, [PR-9255](https://github.com/NVIDIA/TensorRT-LLM/pull/9255) introduced an additional preliminary bin-distribution step to reduce the number of candidates more efficiently before the final sort. This preprocessing step halves the candidate set and uses the leading 11 bits of each value to compute its bin index.
##### Performance Results
<sub><em>Table1: Compare the performance of torch.topk and our customized Top-K op on B200.</em></sub>
| File | torch.topk(us) | TopKPerRow(us) | Speedup |
|-------------------------------|----------------|----------------|---------|
| topk_inputs_layer0_rank0.npy | 106.877 | 14.069 | 7.596 |
| topk_inputs_layer0_rank1.npy | 109.501 | 14.217 | 7.702 |
| topk_inputs_layer0_rank2.npy | 104.616 | 14.079 | 7.431 |
| topk_inputs_layer0_rank3.npy | 105.049 | 14.016 | 7.495 |
| topk_inputs_layer0_rank4.npy | 105.526 | 14.073 | 7.498 |
| topk_inputs_layer0_rank5.npy | 105.034 | 13.986 | 7.510 |
| topk_inputs_layer0_rank6.npy | 104.516 | 14.079 | 7.423 |
| topk_inputs_layer0_rank7.npy | 105.099 | 14.189 | 7.407 |
| topk_inputs_layer10_rank0.npy | 109.614 | 15.281 | 7.173 |
| topk_inputs_layer10_rank1.npy | 104.838 | 15.284 | 6.859 |
| Average | 106.067 | 14.327 | 7.410 |
We use the data that is exported from real datasets across different layers. The input tensor size for each case is [64, 9295]. We select the top 2048 from the valid candidates for each query. As shown in Table 1, compared to the native torch.topk implementation, our implementation achieves an average speedup of 7.41x. This significantly optimizes the duration of the indexer module.
Overall, by replacing the DE-version Top-K from PyTorch with our customized non-DE Top-K kernel, which brings 25%~40% and 14%~24% e2e speedup for the low latency and throughput scenarios.
#### DeepGEMM MQA Kernel
The DeepGEMM MQA kernel computes logits for the Top-K selection process. To enhance efficiency on Blackwell GPUs, several optimizations were implemented targeting both performance and ease of use:
- Larger MMA Tile Size: We increased the MMA tile size for both the prefill and decoding MQA kernels, yielding up to a 10% performance improvement. This optimization was implemented in commit [2f9d878](https://github.com/deepseek-ai/DeepGEMM/commit/2f9d87877ed691a62796c25f2e9496a5e0b7123a) and [fc97232](https://github.com/deepseek-ai/DeepGEMM/commit/fc97232c6f23bf5b4be5bdef52af8ce5dc499460).
- Flexible Paged KV Cache Configurations: The decoding MQA kernel now supports a wider range of configurations. While the initial version was restricted to a block size of 64 tokens, commit [c5d4d74](https://github.com/deepseek-ai/DeepGEMM/commit/c5d4d7448665ae90a81d9d31d60d445010da50f0) enabled support for any block size $B$ satisfying the condition $64 \% B = 0$.
- MTP-3 Support: Previously, the kernel was limited to MTP-0 or MTP-1 (predicting at most one draft token). Since MTP-3 typically delivers superior performance in low-latency scenarios, optimizations were introduced (see commit [2be3f36](https://github.com/deepseek-ai/DeepGEMM/commit/2be3f367854702e3887ff5b28b274cb16b441af9)) to enable native MTP-3 support.
#### Kernel Fusion
Kernel fusion is a standard optimization technique for improving performance. For DeepSeek-V3.2, we implemented specific fusion strategies:
- Custom Kernels for Indexer K Cache Population: The indexer MQA utilizes blockwise FP8 for both Q and K inputs, requiring the indexer K cache to store data in a specific blockwise FP8 format. During the forward pass, the indexer K tensor must be quantized, and both the values and scaling factors are saved to the cache. To optimize this, [PR-8701](https://github.com/NVIDIA/TensorRT-LLM/pull/8701) fused the blockwise FP8 quantization logic into a single kernel. Since the original PyTorch operations were a bottleneck, this resulted in a significant 32.64%64.20% improvement in inference throughput. Subsequently, [PR-8960](https://github.com/NVIDIA/TensorRT-LLM/pull/8960) fused indexer K tensor storage operations into a custom kernel, delivering an additional 3.5%13.4% end-to-end (E2E) performance gain.
- Fusing Small Kernels via torch.compile(): Beyond the major kernels, DSA involves numerous small kernels with low latencies. To reduce kernel launch overhead, we leverage torch.compile() to fuse these smaller operations:
- [PR-8988](https://github.com/NVIDIA/TensorRT-LLM/pull/8988) consolidated indexer weight scaling for blockwise FP8 quantization.
- [PR-9052](https://github.com/NVIDIA/TensorRT-LLM/pull/9052) fused LayerNorm operations, yielding around 1.42% speedup for low-latency scenarios and 1.90% for throughput-oriented scenarios.
### System Optimizations
#### Multi-steams
Multi-stream execution is leveraged in the following optimizations:
- [PR-8988](https://github.com/NVIDIA/TensorRT-LLM/pull/8988) employs multi-stream to overlap indexer weight scaling with the indexer K cache update. Combined with torch.compile() optimization for the indexer weight scaling, this yields approximately 2.53% speedup in low-latency scenarios.
- When improving the blockwise FP8 quantization in [PR-8701](https://github.com/NVIDIA/TensorRT-LLM/pull/8701), multi-stream is also used to enable concurrent quantization of the indexer Q and K tensors.
- [PR-9243](https://github.com/NVIDIA/TensorRT-LLM/pull/9243) changed the indexer weight projection GEMM to FP32 to improve accuracy. However, this introduced a performance regression compared to the low-precision implementation. To mitigate this, multi-stream is utilized to overlap the FP32 weight projection GEMM with the indexer low-rank Q projection GEMM, LayerNorm, and Q/K RoPE operations.
#### A Fast Path for Short Sequences
DeepSeek-V3.2 employs K=2048 for the Top-K selector. For sequences with length $N \le 2048$, all past KV tokens are inherently selected, rendering the MQA and Top-K operations redundant. [PR-9524](https://github.com/NVIDIA/TensorRT-LLM/pull/9524) implements a "fast path" to bypass these unnecessary operations for short sequences.
For the implementation, we can simply generate dense indices during DSA preparation, and directly change to use these dense indices in the indexer forward for prefill requests. However, decoding requests present a challenge due to CUDA Graph integration since the CUDA graph is usually enabled for decoding-only iterations. To ensure compatibility, we capture separate CUDA graphs for short and long sequences. At the start of each iteration, the system checks the sequence lengths: if any request in the batch exceeds the threshold, the long-sequence graph is triggered; otherwise, the short-sequence graph is utilized. This optimization yields approximately 1.03x speedup for 1K/1K scenarios.
## How to Reproduce
This section provides the reproducing steps for NVIDIA Blackwell B200 GPUs, for both model accuracy evaluation and performance benchmark.
The DeepSeek-V3.2 FP4 model is used for evaluation and benchmarking. You can follow [the command of the Model-Optimizer](https://github.com/NVIDIA/Model-Optimizer/tree/main/examples/deepseek#experimental-deepseek-v32) to quantize the original DeepSeek-V3.2 model to FP4.
### Accuracy Evaluation
Evaluate the model accuracy using trtllm-eval.
1. Prepare an advanced configuration file:
```
cat >./config.yml <<EOF
cuda_graph_config:
enable_padding: true
batch_sizes: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,32,64,128]
enable_attention_dp: true
kv_cache_config:
free_gpu_memory_fraction: 0.8
dtype: fp8
moe_config:
backend: TRTLLM
speculative_config:
decoding_type: MTP
num_nextn_predict_layers: 1
EOF
```
2. Evaluate accuracy on the [MMLU](https://people.eecs.berkeley.edu/~hendrycks/data.tar) dataset:
```
model_path=<your model path>
trtllm-eval --model ${model_path} \
--tp_size 8 \
--ep_size 8 \
--kv_cache_free_gpu_memory_fraction 0.8 \
--config ./config.yml \
--custom_tokenizer deepseek_v32 \
mmlu
```
3. Evaluate accuracy on the [GSM8K](https://huggingface.co/datasets/openai/gsm8k) dataset:
```
trtllm-eval --model ${model_path} \
--tp_size 8 \
--ep_size 8 \
--kv_cache_free_gpu_memory_fraction 0.8 \
--config ./config.yml \
--custom_tokenizer deepseek_v32 \
gsm8k
```
4. Evaluate accuracy on the [GPQA-Diamond](https://huggingface.co/datasets/Idavidrein/gpqa) dataset:
```
trtllm-eval --model ${model_path} \
--tp_size 8 \
--ep_size 8 \
--kv_cache_free_gpu_memory_fraction 0.8 \
--config ./config.yml \
--custom_tokenizer deepseek_v32 \
gpqa_diamond \
--apply_chat_template \
--chat_template_kwargs '{"thinking": true}' \
--max_output_length 120000
```
### Benchmark on B200
#### Min-latency
Our benchmark results are based on Batch = 1, ISL = 8K, OSL = 1K, num_requests = 10 from a synthetic dataset.
To do the benchmark, run the following command:
```
data_path=<your dataset file following the format>
model_path=<your model path>
cat <<EOF > ./config.yml
cuda_graph_config:
enable_padding: true
batch_sizes: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,32,64,128]
kv_cache_config:
free_gpu_memory_fraction: 0.8
dtype: fp8
moe_config:
backend: TRTLLM
speculative_config:
decoding_type: MTP
num_nextn_predict_layers: 3
EOF
trtllm-bench -m deepseek-ai/DeepSeek-V3.2-Exp \
--model_path ${model_path} throughput \
--tp 4 \
--warmup 1 \
--dataset ${data_path} \
--backend pytorch \
--max_batch_size 1 \
--max_num_tokens 8384 \
--kv_cache_free_gpu_mem_fraction 0.8 \
--concurrency 1 \
--config ./config.yml \
--num_requests 10 \
--streaming
```
The expected results:
```
===========================================================
= PERFORMANCE OVERVIEW
===========================================================
Request Throughput (req/sec): 0.2678
Total Output Throughput (tokens/sec): 274.1786
Total Token Throughput (tokens/sec): 2467.6070
Total Latency (ms): 37347.9238
Average request latency (ms): 3734.7334
Per User Output Throughput [w/ ctx] (tps/user): 276.2231
Per GPU Output Throughput (tps/gpu): 68.5446
Average time-to-first-token [TTFT] (ms): 425.9885
Average time-per-output-token [TPOT] (ms): 3.2344
Per User Output Speed (tps/user): 312.0708
```
<sub><em>\* Note that `max_num_tokens` is set to a large value to cover the maximum sequence length. Please refer to the [Best Performance Practices](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md#wip-enable-more-features-by-default) for more details on `max_num_tokens` configuration.</em></sub>
#### Max-throughput
Our benchmark results are based on Batch = 256, ISL = 8K, OSL = 1K, num_requests = 768 from a synthetic dataset.
To do the benchmark, run the following command:
```
data_path=<your dataset file following the format>
model_path=<your model path>
cat <<EOF > ./config.yml
cuda_graph_config:
enable_padding: true
batch_sizes: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,32,64,128]
enable_attention_dp: true
kv_cache_config:
free_gpu_memory_fraction: 0.8
dtype: fp8
moe_config:
backend: TRTLLM
speculative_config:
decoding_type: MTP
num_nextn_predict_layers: 1
EOF
trtllm-bench -m deepseek-ai/DeepSeek-V3.2-Exp \
--model_path ${model_path} throughput \
--tp 8 \
--ep 8 \
--warmup 1 \
--dataset ${data_path} \
--backend pytorch \
--max_batch_size 256 \
--max_num_tokens 8576 \
--kv_cache_free_gpu_mem_fraction 0.8 \
--concurrency 256 \
--config ./config.yml \
--num_requests 768 \
--streaming
```
The expected results:
```
===========================================================
= PERFORMANCE OVERVIEW
===========================================================
Request Throughput (req/sec): 8.4162
Total Output Throughput (tokens/sec): 8618.2158
Total Token Throughput (tokens/sec): 77563.9425
Total Latency (ms): 365009.1921
Average request latency (ms): 120325.7013
Per User Output Throughput [w/ ctx] (tps/user): 9.8876
Per GPU Output Throughput (tps/gpu): 1077.2770
Average time-to-first-token [TTFT] (ms): 19537.7776
Average time-per-output-token [TPOT] (ms): 98.5219
Per User Output Speed (tps/user): 11.2591
```
### Benchmark with Wide-EP on GB200
To validate the efficacy of Wide-EP on DeepSeek-V3.2, we evaluated performance using the NVFP4 model on a GB200 NVL72 system. We compared EP16 and EP32 configurations against EP4 and EP8 baselines, with benchmarks conducted at ISL=8K and OSL=1K using the [Rate Matching](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.md#measurement-methodology) methodology.
<div align="center">
<figure>
<img src="https://github.com/NVIDIA/TensorRT-LLM/raw/main/docs/source/blogs/media/tech_blog15_ds32_wide_ep.png" alt="tech_blog15_ds32_wide_ep" width="700" height="auto">
</figure>
</div>
<p align="center"><sub><em>Figure 4. DeepSeek-V3.2 throughput on ISL/OSL 8k/1k. Note that the numbers were collected on November 20th, and more optimizations are still on-going.</em></sub></p>
As illustrated in Figure 4, Wide-EP yields up to a 2.28x improvement in per-GPU output throughput. To reproduce these results, please refer to the [examples/wide_ep/slurm_scripts](https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/wide_ep/slurm_scripts) directory. These scripts demonstrate how to launch disaggregated serving with large-scale EP and associated features on a SLURM cluster.
## Future Works
- Optimize performance for long-sequence scenarios (e.g., ISL=32K, OSL=4K).
- Optimize performance for large Expert Parallelism (EP) configurations.
- Evaluate dense MHA versus MQA modes for context sparse MLA to determine the optimal configuration for processing short sequences.
- Explore more aggressive quantization strategies for DSA.
- Optimize the implementation of the indexer Top-K kernel.
- Investigate KV cache offloading mechanisms for DSA.
## Acknowledgement
Achieving these remarkable performance gains since the release of DeepSeek-V3.2-Exp was truly a collaborative triumph. We extend our deepest gratitude to everyone who contributed to the functional implementation and performance optimization of the DeepSeek-V3.2 model.
This work serves as a testament to TensorRT LLM's flexibility and effectiveness in supporting architectural innovations and novel sparse attention mechanisms. We hope this work paves the way for further advancements in sparse attention support.

View File

@ -227,3 +227,7 @@ Run `bench.sh` to begin a serving benchmark. This will take a long time if you r
```shell
./bench.sh
```
## Known Issues
Qwen3-Next-80B-A3B exhibits relatively low accuracy on the SciCode-AA-v2 benchmark.

View File

@ -38,13 +38,14 @@ Note: Support for other models may vary. Features marked "N/A" are not applicabl
| `DeepseekV3ForCausalLM` | Yes | Yes | Yes | Yes | Yes [^1] | Yes | No | No | Yes | Yes | Yes [^2] | N/A | Yes | Yes |
| `DeepseekV32ForCausalLM` | Yes | Yes | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes | N/A | Yes | Yes |
| `Qwen3MoeForCausalLM` | Yes | Yes | Yes | Yes | Yes | No | Yes | Yes | Yes | Yes | Yes | N/A | Yes | Yes |
| `Qwen3NextForCausalLM` | Yes | Yes | No | Untested | Yes | No | No | No | Yes | Yes | No | No | Untested | Untested |
| `Qwen3NextForCausalLM` [^3] | Yes | Yes | No | Untested | Yes | No | No | No | Yes | Yes | No | No | Untested | Untested |
| `Llama4ForConditionalGeneration` | Yes | Yes | Yes | Yes | Yes | No | Yes | Yes | Yes | Yes | Untested | N/A | Yes | Yes |
| `GptOssForCausalLM` | Yes | Yes | Yes | Yes | Yes | No | Yes | Yes [^3] | Yes | Yes | Yes | N/A | Yes | Yes |
| `GptOssForCausalLM` | Yes | Yes | Yes | Yes | Yes | No | Yes | Yes [^4] | Yes | Yes | Yes | N/A | Yes | Yes |
[^1]: Chunked Prefill for MLA can only be enabled on SM100/SM103.
[^2]: KV cache reuse for MLA can only be enabled on SM90/SM100/SM103 and in BF16/FP8 KV cache dtype.
[^3]: Overlap scheduler isn't supported when using EAGLE-3(Two Model Engine) for GPT-OSS.
[^3]: Qwen3-Next-80B-A3B exhibits relatively low accuracy on the SciCode-AA-v2 benchmark.
[^4]: Overlap scheduler isn't supported when using EAGLE-3(Two Model Engine) for GPT-OSS.
# Multimodal Feature Support Matrix (PyTorch Backend)

View File

@ -0,0 +1 @@
attn_backend: triton

View File

@ -65,7 +65,7 @@ models:
- name: bigcode/starcoder2-7b
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: bigcode/starcoder2-15b-instruct-v0.1
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'attn_backend_triton.yaml']
- name: deepseek-ai/DeepSeek-Prover-V1.5-SFT
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'compile_backend_torch_cudagraph.yaml']
- name: deepseek-ai/DeepSeek-Prover-V2-7B
@ -118,8 +118,6 @@ models:
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
- name: google/gemma-3-27b-it
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
- name: google/gemma-3-2b-it
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
- name: deepseek-ai/DeepSeek-V2.5
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
# DISABLED: Network timeout downloading from Hugging Face
@ -145,8 +143,6 @@ models:
# DISABLED: Graph transformation error in auto-deploy
# - name: neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8
# yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: TheBloke/falcon-40b-instruct-GPTQ
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: Qwen/QwQ-32B
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
- name: google/gemma-2-27b-it
@ -159,7 +155,7 @@ models:
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: Qwen/QwQ-32B-Preview
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'compile_backend_torch_cudagraph.yaml']
- name: Qwen/Qwen3-Coder-32B-Instruct
- name: Qwen/Qwen3-Coder-30B-A3B-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
- name: Qwen/Qwen3-235B-A22B-Instruct-2507
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
@ -222,3 +218,5 @@ models:
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_scout.yaml']
- name: meta-llama/Llama-4-Maverick-17B-128E-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_maverick_lite.yaml']
- name: nvidia/NVIDIA-Nemotron-3-Super-120B-BF16-BF16KV-010726
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml','super_v3.yaml']

View File

@ -10,10 +10,11 @@ import torch
import yaml
from tensorrt_llm._torch.autotuner import AutoTuner, autotune
from tensorrt_llm._torch.distributed import MPIDist, TorchDist
from tensorrt_llm._torch.modules.fused_moe.fused_moe_cutlass import CutlassFusedMoE
from tensorrt_llm._torch.modules.fused_moe.interface import AlltoallMethodType
from tensorrt_llm._torch.modules.multi_stream_utils import with_multi_stream
from tensorrt_llm._utils import local_mpi_rank, mpi_rank, mpi_world_size
from tensorrt_llm._utils import local_mpi_rank, mpi_disabled, mpi_rank, mpi_world_size
from tensorrt_llm.logger import logger
from tensorrt_llm.tools.layer_wise_benchmarks import BalanceMethod, get_runner_cls, mark_ranges
@ -173,6 +174,8 @@ run_pack = runner.create_run_pack(
)
if args.enable_autotuner:
cache_path = os.getenv("TLLM_AUTOTUNER_CACHE_PATH") or None
dist = TorchDist(mapping=mapping) if mpi_disabled() else MPIDist(mapping=mapping)
AutoTuner.get().setup_distributed_state(mapping, dist)
with autotune(cache_path=cache_path):
run_pack()
else:

View File

@ -1,9 +1,6 @@
# EXAONE
This document shows how to build and run a [EXAONE](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct) model in TensorRT-LLM.
The TensorRT LLM EXAONE implementation is based on the LLaMA model. The implementation can be found in [llama/model.py](../../../../tensorrt_llm/models/llama/model.py).
See the LLaMA example [`examples/models/core/llama`](../llama) for details.
This document shows how to build and run [EXAONE](https://huggingface.co/LGAI-EXAONE) models in TensorRT-LLM.
- [EXAONE](#exaone)
- [Support Matrix](#support-matrix)
@ -11,31 +8,51 @@ See the LLaMA example [`examples/models/core/llama`](../llama) for details.
- [EXAONE-3.0](#exaone-30)
- [EXAONE-Deep](#exaone-deep)
- [EXAONE-4.0](#exaone-40)
- [Usage](#usage)
- [PyTorch flow](#pytorch-flow)
-[PyTorch flow Quantization](#pytorch-flow-quantization)
- [TRT Flow](#trt-flow)
- [K-EXAONE](#k-exaone)
- [PyTorch flow](#pytorch-flow)
- [Running EXAONE-4.0](#running-exaone-40)
- [Running K-EXAONE](#running-k-exaone)
- [MoE Backend Options](#moe-backend-options)
- [PyTorch flow Quantization](#pytorch-flow-quantization)
- [FP8 Quantization](#fp8-quantization)
- [NVFP4 Quantization](#nvfp4-quantization)
- [Running the TensorRT LLM Server](#running-the-tensorrt-llm-server)
- [Running Aggregated TensorRT LLM Server](#running-aggregated-tensorrt-llm-server)
- [Creating the Extra Options Configuration](#creating-the-extra-options-configuration)
- [Launch trtllm-serve OpenAI-compatible API server](#launch-trtllm-serve-openai-compatible-api-server)
- [Running Disaggregated TensorRT LLM Server](#running-disaggregated-tensorrt-llm-server)
- [Step 1: Set Environment Variables](#step-1-set-environment-variables)
- [Step 2: Create Configuration Files](#step-2-create-configuration-files)
- [Step 3: Launch the Disaggregated Server](#step-3-launch-the-disaggregated-server)
- [TRT flow](#trt-flow)
- [Convert checkpoint and build TensorRT engine(s)](#convert-checkpoint-and-build-tensorrt-engines)
- [FP8 Post-Training Quantization](#fp8-post-training-quantization)
- [SmoothQuant](#smoothquant)
- [Groupwise quantization (AWQ)](#groupwise-quantization-awq)
- [W4A16 AWQ with FP8 GEMM (W4A8 AWQ)](#w4a16-awq-with-fp8-gemm-w4a8-awq)
- [W4A16 AWQ with FP8 GEMM (W4A8 AWQ)](#w4a16-awq-with-fp8-gemm-w4a8-awq)
- [Run Engine](#run-engine)
- [Troubleshootings](#troubleshootings)
- [Troubleshootings for EXAONE-4.0](#troubleshootings-for-exaone-40)
- [Troubleshootings for K-EXAONE](#troubleshootings-for-k-exaone)
## Support Matrix
* FP16
* BF16
* Tensor Parallel
* Tensor Parallel (TP)
* Expert Parallel (EP) (K-EXAONE only)
* Attention Data Parallel (ADP) (K-EXAONE only)
* Disaggregated Serving
* FP8
* INT8 & INT4 Weight-Only
* INT8 SmoothQuant
* INT4 AWQ & W4A8 AWQ
* NVFP4 (K-EXAONE only)
## Supported Models
**Note:**
- **EXAONE-3.0** and **EXAONE-Deep** are supported using the [TRT Flow](#trt-flow).
- **EXAONE-4.0** is supported using the [PyTorch flow](#pytorch-flow).
**Note:**
- **EXAONE-3.0** & **EXAONE-Deep** are supported using the [TRT Flow](#trt-flow).
- **EXAONE-4.0** & **K-EXAONE** are supported using the [PyTorch flow](#pytorch-flow).
Please refer to the corresponding sections below for usage instructions and examples for each model.
@ -59,23 +76,33 @@ git clone https://huggingface.co/LGAI-EXAONE/EXAONE-Deep-2.4B $HF_MODEL_DIR
### EXAONE-4.0
Download he HuggingFace checkpoints of EXAONE-4.0 model. Here, we only use the `EXAONE-4.0-32B` model for the example. From EXAONE-4.0 model, we support only on PyTorch flow.
Download the HuggingFace checkpoints of the EXAONE-4.0 model. Here, we use the `EXAONE-4.0-32B` model as an example. EXAONE-4.0 is supported only via the PyTorch flow.
```bash
export HF_MODEL_DIR=hf_models/exaone4
git clone https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B $HF_MODEL_DIR
```
### Pytorch flow
### K-EXAONE
K-EXAONE is a Mixture of Experts (MoE) model based on the EXAONE architecture. It features a hybrid architecture with both dense and MoE layers, sliding window attention, and supports FP8 and NVFP4 quantization for efficient inference.
Download the HuggingFace checkpoints of the K-EXAONE model:
```bash
export HF_MODEL_DIR=hf_models/kexaone
git clone https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B $HF_MODEL_DIR
```
## PyTorch flow
### Running EXAONE-4.0
To quickly run EXAONE-4.0 models, you can use [examples/llm-api/quickstart_advanced.py](../../../llm-api/quickstart_advanced.py):
```bash
python ../../../llm-api/quickstart_advanced.py --model_dir hf_models/$MODEL_NAME --disable_kv_cache_reuse
python ../../../llm-api/quickstart_advanced.py --model_dir $HF_MODEL_DIR
```
SWA currently does not support kv_cache_reuse. Please make sure to disable KV cache reuse when running with SWA.
The output will be like:
```bash
[0] Prompt: 'Hello, my name is', Generated text: " [Your Name], and I'm a [Your Profession]. I'm here to learn and share with you.\n\nBest regards,\n[Your Name]\n\nThis letter is concise, professional, and clearly states who you are and what you're here for. It's a good starting point"
@ -83,47 +110,239 @@ The output will be like:
[2] Prompt: 'The future of AI is', Generated text: ' not just about technology but also about how we choose to use it. We must ensure that AI is developed and deployed in a way that benefits all of humanity, not just a select few. This means prioritizing ethical considerations, transparency, and accountability in AI development. It also means involving diverse stakeholders in the conversation about AI'
```
#### PyTorch flow Quantization
### Running K-EXAONE
For PyTorch flow, TRT-LLM supports quantized format generated by [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer).
You can either do pre-quantized models in HF model hub, or can generate quantized model by yourself and then run models with below command:
K-EXAONE is a Mixture of Experts model that benefits from multiple parallelism strategies. You can run it with tensor parallelism (TP), expert parallelism (EP), and attention data parallelism (ADP):
```bash
git clone https://github.com/NVIDIA/Model-Optimizer.git
python ../../../llm-api/quickstart_advanced.py \
--model_dir $HF_MODEL_DIR \
--tp_size 8 \
--moe_ep_size 8 \
--enable_attention_dp \
--trust_remote_code
```
The output will be like:
```bash
[0] Prompt: 'Hello, my name is', Generated text: ' John Smith, and I am a 28-year-old software developer. I live in the city of San Francisco, California. I work remotely for a tech startup based in Austin, Texas.\n\nI enjoy hiking, reading, and playing the piano. In my free time, I often explore new neighborhoods in San Francisco, trying out new restaurants and cafes.\n\n'
[1] Prompt: 'The capital of France is', Generated text: ' Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris, the capital of France is Paris'
[2] Prompt: 'The future of AI is', Generated text: ' bright.\n</think>\n\nThe future of AI holds immense promise across numerous domains. In healthcare, AI is revolutionizing diagnostics, drug discovery, and personalized treatment plans. In education, AI is enabling adaptive learning platforms that cater to individual learning styles and paces. In environmental science, AI is playing a pivotal role in addressing climate change by optimizing'
```
#### MoE Backend Options
K-EXAONE supports the following MoE backends:
| Backend | Description |
|---------|-------------|
| `CUTLASS` | Default backend, optimized for general use cases |
| `TRTLLM` | TensorRT-LLM backend using TRT-LLM Gen kernels, optimized for low-latency inference |
| `WIDEEP` | Wide expert parallelism backend for cases where EP size exceeds the number of experts |
You can specify the MoE backend using the `--moe_backend` argument:
```bash
python ../../../llm-api/quickstart_advanced.py \
--model_dir $HF_MODEL_DIR \
--tp_size 8 \
--moe_ep_size 8 \
--enable_attention_dp \
--moe_backend CUTLASS \
--trust_remote_code
```
### PyTorch flow Quantization
For PyTorch flow, TRT-LLM supports quantized formats generated by [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer). You can either use pre-quantized models from the HuggingFace model hub, or generate quantized models yourself using the instructions below.
First, clone the [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer) repository:
```bash
git clone https://github.com/NVIDIA/Model-Optimizer
cd Model-Optimizer/examples/llm_ptq
scripts/huggingface_example.sh --model hf_models/$MODEL_NAME --quant fp8 --export_fmt hf
```
For more information, please refer to official [docs](https://github.com/NVIDIA/Model-Optimizer) or [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer).
For more information, please refer to the official [Model Optimizer documentation](https://github.com/NVIDIA/Model-Optimizer).
Troubleshooting
#### FP8 Quantization
FP8 quantization provides a good balance between model accuracy and inference performance. To quantize a model to FP8 format:
The following error may occur during quantization:
```bash
torch._dynamo.exc.Unsupported: Graph break under GenericContextWrappingVariable
Explanation: Attempted to graph break in an active context manager(s) that doesn't support graph breaking.
Hint: Move the offending context manager(s) to outside the compiled region.
Hint: This graph break may have been caused by an earlier graph break. Resolving the earlier graph break may resolve this one.
python3 hf_ptq.py --model $HF_MODEL_DIR --quant fp8 --export_fmt hf
```
This error may indicate an incompatibility between `torch.compile()` and the `HybridCache` module of the transformers library. As a result, [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer) (ModelOpt) cannot perform PTQ with HybridCache.
#### NVFP4 Quantization
Temporarily switching to `DynamicCache` when creating PTQ models could help address the issue. This can be done by updating the `cache_implementation` field in the `generation_config.json` file located in the model checkpoint directory, for example:
```json
# generation_config.json
{
// Change "hybrid" to "dynamic" to run PTQ.
// Revert this to "hybrid" after quantization is complete.
"cache_implementation": "hybrid",
...
}
NVFP4 (4-bit floating point) quantization enables memory-efficient inference with reduced GPU memory footprint. To quantize a model to NVFP4 format:
```bash
python3 hf_ptq.py --model $HF_MODEL_DIR --quant nvfp4 --export_fmt hf
```
For models with sliding window attention, DynamicCache is less memory-efficient than HybridCache because it retains the entire key-value cache. However, this does not break the model's attention logic, as the cache implementation is separated from the attention computation itself. This trade-off is acceptable for the PTQ process, which is a one-time procedure. Our tests confirm that this workaround does not degrade accuracy on MMLU or GSM8K benchmarks with the default ModelOpt settings.
### TRT flow
## Running the TensorRT LLM Server
The next section describe how to convert the weights from the [HuggingFace (HF) Transformers](https://github.com/huggingface/transformers) format to the TensorRT LLM format. We will use llama's [convert_checkpoint.py](../llama/convert_checkpoint.py) for EXAONE model and then we build the model with `trtllm-build`.
This section describes how to deploy the K-EXAONE model using the TensorRT LLM server with an OpenAI-compatible API endpoint.
Make sure `HF_MODEL_DIR` points to your EXAONE checkpoint directory.
The examples in this section are intended as a minimal, runnable demonstration and are not fully performance-optimized. For more features and performance tuning, please refer the documents below.
- [Disaggregated Serving examples](../../../disaggregated/README.md)
- [Disaggregated Serving feature guide](../../../../docs/source/features/disagg-serving.md)
- [Recommended LLM API configuration settings](../../../configs/README.md) (see also `examples/configs/curated/`)
### Running Aggregated TensorRT LLM Server
The aggregated server runs all components (context and generation phases) on the same set of GPUs, which is suitable for single-node deployments.
#### Creating the Extra Options Configuration
Create a YAML configuration file to specify advanced options such as attention data parallelism, CUDA graph settings, and MoE backend configuration:
```bash
cat <<EOF > configs.yaml
enable_attention_dp: true
trust_remote_code: true
cuda_graph_config:
max_batch_size: 2048
enable_padding: true
moe_config:
backend: CUTLASS # The TRTLLM backend is recommended for the Blackwell architecture.
kv_cache_config:
enable_block_reuse: true # Please disable the block reuse feature when conducting performance benchmarking.
max_attention_window: [128, 128, 128, 131072] # This allows KV cache manager to possibly improve memory efficiency.
free_gpu_memory_fraction: 0.9
dtype: "auto"
attention_dp_config:
enable_balance: true
batching_wait_iters: 50
timeout_iters: 1
num_postprocess_workers: 4 # Can mitigate the postprocessing overhead (e.g. detokenization)
EOF
```
#### Launch trtllm-serve OpenAI-compatible API server
Start the server using `trtllm-serve` with the PyTorch backend. This launches an OpenAI-compatible API server that can handle chat completions and text generation requests:
```bash
trtllm-serve \
$HF_MODEL_DIR \
--host localhost \
--port 8000 \
--backend pytorch \
--max_batch_size 2048 \
--max_num_tokens 8192 \
--tp_size 8 \
--ep_size 8 \
--pp_size 1 \
--config ./configs.yaml
```
Once the server is running, you can send requests to `http://localhost:8000/v1/completions` using the OpenAI API format.
### Running Disaggregated TensorRT LLM Server
Disaggregated serving separates the context (prefill) and generation (decode) phases onto different GPU sets, enabling better resource utilization and improved throughput. This example demonstrates a single-node disaggregated deployment using 8 GPUs (4 for context, 4 for generation). For more details, see the [Disaggregated Serving documentation](../../../disaggregated/README.md).
#### Step 1: Set Environment Variables
Configure the parallelism and buffer settings:
```bash
# Buffer size for KV cache transfer between context and generation servers
export MAX_TOKENS_IN_BUFFER=8192
# Model parallelism configuration
export TP_SIZE=4
export MOE_EP_SIZE=4
export ENABLE_ATTENTION_DP=true
```
#### Step 2: Create Configuration Files
**Context server configuration (`ctx_extra-llm-api-config.yaml`):**
```bash
cat > ctx_extra-llm-api-config.yaml << EOF
backend: pytorch
trust_remote_code: true
disable_overlap_scheduler: true
enable_chunked_prefill: true
tensor_parallel_size: $TP_SIZE
moe_expert_parallel_size: $MOE_EP_SIZE
pipeline_parallel_size: 1
enable_attention_dp: $ENABLE_ATTENTION_DP
cache_transceiver_config:
backend: UCX
max_tokens_in_buffer: $MAX_TOKENS_IN_BUFFER
EOF
```
**Generation server configuration (`gen_extra-llm-api-config.yaml`):**
```bash
cat > gen_extra-llm-api-config.yaml << EOF
backend: pytorch
trust_remote_code: true
disable_overlap_scheduler: false
enable_chunked_prefill: true
tensor_parallel_size: $TP_SIZE
moe_expert_parallel_size: $MOE_EP_SIZE
pipeline_parallel_size: 1
enable_attention_dp: $ENABLE_ATTENTION_DP
cache_transceiver_config:
backend: UCX
max_tokens_in_buffer: $MAX_TOKENS_IN_BUFFER
EOF
```
**Disaggregated orchestrator configuration (`disagg_config.yaml`):**
```bash
cat > disagg_config.yaml << EOF
hostname: localhost
port: 8000
backend: pytorch
context_servers:
num_instances: 1
urls:
- "localhost:8001"
generation_servers:
num_instances: 1
urls:
- "localhost:8002"
EOF
```
#### Step 3: Launch the Disaggregated Server
Start all components in the following order:
```bash
# 1. Start context server (GPUs 0-3)
CUDA_VISIBLE_DEVICES=0,1,2,3 trtllm-serve $HF_MODEL_DIR \
--host localhost --port 8001 --enable_chunked_prefill \
--extra_llm_api_options ./ctx_extra-llm-api-config.yaml &> log_ctx.log &
# 2. Start generation server (GPUs 4-7)
CUDA_VISIBLE_DEVICES=4,5,6,7 trtllm-serve $HF_MODEL_DIR \
--host localhost --port 8002 --enable_chunked_prefill \
--extra_llm_api_options ./gen_extra-llm-api-config.yaml &> log_gen.log &
# 3. Start disaggregated orchestrator
trtllm-serve disaggregated -c disagg_config.yaml -t 360 -r 1200 &> log_disagg.log &
```
Once all servers are running, you can send requests to `http://localhost:8000/v1/completions` using the OpenAI API format.
## TRT flow
The next section describes how to convert weights from the [HuggingFace (HF) Transformers](https://github.com/huggingface/transformers) format to the TensorRT LLM format. We will use LLaMA's [convert_checkpoint.py](../llama/convert_checkpoint.py) for EXAONE models and then build the model with `trtllm-build`.
### Convert checkpoint and build TensorRT engine(s)
@ -141,7 +360,7 @@ trtllm-build \
--output_dir trt_engines/exaone/fp16/1-gpu \
--gemm_plugin auto
# Build the EXAONE model using a single GPU and and apply INT8 weight-only quantization.
# Build the EXAONE model using a single GPU and apply INT8 weight-only quantization.
python ../llama/convert_checkpoint.py \
--model_dir $HF_MODEL_DIR \
--output_dir trt_models/exaone/int8_wq/1-gpu \
@ -154,7 +373,7 @@ trtllm-build \
--output_dir trt_engines/exaone/int8_wq/1-gpu \
--gemm_plugin auto
# Build the EXAONE model using a single GPU and and apply INT4 weight-only quantization.
# Build the EXAONE model using a single GPU and apply INT4 weight-only quantization.
python ../llama/convert_checkpoint.py \
--model_dir $HF_MODEL_DIR \
--output_dir trt_models/exaone/int4_wq/1-gpu \
@ -183,18 +402,18 @@ trtllm-build \
### FP8 Post-Training Quantization
The examples below uses the NVIDIA Modelopt (AlgorithMic Model Optimization) toolkit for the model quantization process.
The examples below use the NVIDIA ModelOpt (AlgorithMic Model Optimization) toolkit for the model quantization process.
First make sure Modelopt toolkit is installed (see [examples/quantization/README.md](/examples/quantization/README.md#preparation))
```bash
# Build the EXAONE model using a single GPU and and apply FP8 quantization.
# Build the EXAONE model using a single GPU and apply FP8 quantization.
python ../../../quantization/quantize.py \
--model_dir $HF_MODEL_DIR \
--dtype float16 \
--qformat fp8 \
--kv_cache_dtype fp8 \
--output_dir trt_models/exaone/fp8/1-gpu \
--output_dir trt_models/exaone/fp8/1-gpu
trtllm-build \
--checkpoint_dir trt_models/exaone/fp8/1-gpu \
@ -204,12 +423,12 @@ trtllm-build \
### SmoothQuant
The examples below uses the NVIDIA Modelopt (AlgorithMic Model Optimization) toolkit for the model quantization process.
The examples below use the NVIDIA ModelOpt (AlgorithMic Model Optimization) toolkit for the model quantization process.
First make sure Modelopt toolkit is installed (see [examples/quantization/README.md](/examples/quantization/README.md#preparation))
```bash
# Build the EXAONE model using a single GPU and and apply INT8 SmoothQuant.
# Build the EXAONE model using a single GPU and apply INT8 SmoothQuant.
python ../../../quantization/quantize.py \
--model_dir $HF_MODEL_DIR \
--dtype float16 \
@ -224,12 +443,12 @@ trtllm-build \
### Groupwise quantization (AWQ)
The examples below uses the NVIDIA Modelopt (AlgorithMic Model Optimization) toolkit for the model quantization process.
The examples below use the NVIDIA ModelOpt (AlgorithMic Model Optimization) toolkit for the model quantization process.
First make sure Modelopt toolkit is installed (see [examples/quantization/README.md](/examples/quantization/README.md#preparation))
```bash
# Build the EXAONE model using a single GPU and and apply INT4 AWQ.
# Build the EXAONE model using a single GPU and apply INT4 AWQ.
python ../../../quantization/quantize.py \
--model_dir $HF_MODEL_DIR \
--dtype float16 \
@ -248,7 +467,7 @@ For Hopper GPUs, TRT-LLM also supports employing FP8 GEMM for accelerating linea
Please make sure your system contains a Hopper GPU before trying the commands below.
```bash
# Build the EXAONE model using a single GPU and and apply W4A8 AWQ.
# Build the EXAONE model using a single GPU and apply W4A8 AWQ.
python ../../../quantization/quantize.py \
--model_dir $HF_MODEL_DIR \
--dtype float16 \
@ -287,4 +506,50 @@ python ../../../summarize.py \
--engine_dir trt_engines/exaone/fp16/1-gpu
```
For more examples see [`examples/models/core/llama/README.md`](../llama/README.md)
For more examples regarding EXAONE-3.0 & EXAONE-Deep's TRT flow, see [`examples/models/core/llama/README.md`](../llama/README.md)
## Troubleshootings
### Troubleshootings for EXAONE-4.0
The following error may occur during quantization:
```bash
torch._dynamo.exc.Unsupported: Graph break under GenericContextWrappingVariable
Explanation: Attempted to graph break in an active context manager(s) that doesn't support graph breaking.
Hint: Move the offending context manager(s) to outside the compiled region.
Hint: This graph break may have been caused by an earlier graph break. Resolving the earlier graph break may resolve this one.
```
This error may indicate an incompatibility between `torch.compile()` and the `HybridCache` module of the transformers library. As a result, [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer) (ModelOpt) cannot perform PTQ with HybridCache.
Temporarily switching to `DynamicCache` when creating PTQ models could help address the issue. This can be done by updating the `cache_implementation` field in the `generation_config.json` file located in the model checkpoint directory, for example:
```json
# generation_config.json
{
// Change "hybrid" to "dynamic" to run PTQ.
// Revert this to "hybrid" after quantization is complete.
"cache_implementation": "hybrid",
...
}
```
For models with sliding window attention, DynamicCache is less memory-efficient than HybridCache because it retains the entire key-value cache. However, this does not break the model's attention logic, as the cache implementation is separated from the attention computation itself. This trade-off is acceptable for the PTQ process, which is a one-time procedure. Our tests confirm that this workaround does not degrade accuracy on MMLU or GSM8K benchmarks with the default ModelOpt settings.
### Troubleshootings for K-EXAONE
K-EXAONE is a Mixture of Experts (MoE) model which activates 8 experts per token. When not enough tokens are given during the PTQ, some experts on some layers might not be activated and will not produce proper weights.
To address this issue, provide enough data samples during calibration by increasing `calib_size` and `calib_seq` parameters:
**FP8 Quantization:**
```bash
cd Model-Optimizer/examples/llm_ptq
python3 hf_ptq.py --model hf_models/$MODEL_NAME --quant fp8 --export_fmt hf --calib_size 8192 --calib_seq 1024
```
**NVFP4 Quantization:**
```bash
cd Model-Optimizer/examples/llm_ptq
python3 hf_ptq.py --model hf_models/$MODEL_NAME --quant nvfp4 --export_fmt hf --calib_size 8192 --calib_seq 1024
```

View File

@ -10,7 +10,7 @@ tiktoken
einops
# optional dependencies
gradio==4.44.1
gradio==5.4.0
mdtex2html
sse_starlette
aiohttp_sse_client

View File

@ -1155,7 +1155,7 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
export pytestCommand="$pytestCommand"
export coverageConfigFile="$coverageConfigFile"
export NVIDIA_IMEX_CHANNELS=\${NVIDIA_IMEX_CHANNELS:-0}
export NVIDIA_VISIBLE_DEVICES=\${NVIDIA_VISIBLE_DEVICES:-\$(seq -s, 0 \$((\$(nvidia-smi --query-gpu=count -i 0 --format=noheader)-1)))}
export NVIDIA_VISIBLE_DEVICES=\${NVIDIA_VISIBLE_DEVICES:-\$(seq -s, 0 \$((\$(nvidia-smi --query-gpu=count -i 0 --format=csv,noheader)-1)))}
${envExportStatements}
echo "Env NVIDIA_IMEX_CHANNELS: \$NVIDIA_IMEX_CHANNELS"
@ -3249,10 +3249,12 @@ def launchTestJobs(pipeline, testFilter)
fullSet = parallelJobs.keySet()
x86SlurmTestConfigs = [
"DGX_H100-2_GPUs-PyTorch-Others-1": ["dgx-h100-x2-oci", "l0_dgx_h100", 1, 1, 2],
"DGX_H100-2_GPUs-PyTorch-Others-1": ["dgx-h100-x2-oci", "l0_dgx_h100", 1, 2, 2],
"DGX_H100-2_GPUs-PyTorch-Others-2": ["dgx-h100-x2-oci", "l0_dgx_h100", 2, 2, 2],
"DGX_H100-2_GPUs-PyTorch-GptOss-1": ["dgx-h100-x2-oci", "l0_dgx_h100", 1, 1, 2],
"DGX_H100-2_GPUs-PyTorch-Ray-1": ["dgx-h100-x2-oci", "l0_dgx_h100", 1, 1, 2],
"DGX_H100-4_GPUs-PyTorch-DeepSeek-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 1, 4],
"DGX_H100-4_GPUs-PyTorch-DeepSeek-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 2, 4],
"DGX_H100-4_GPUs-PyTorch-DeepSeek-2": ["dgx-h100-x4-oci", "l0_dgx_h100", 2, 2, 4],
"DGX_H100-4_GPUs-PyTorch-GptOss-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 1, 4],
"DGX_H100-4_GPUs-PyTorch-Others-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 1, 4],
"DGX_H100-4_GPUs-PyTorch-Ray-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 1, 4],

View File

@ -0,0 +1,111 @@
@Library(['bloom-jenkins-shared-lib@main', 'trtllm-jenkins-shared-lib@main']) _
import java.lang.InterruptedException
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202510291120-8621"
// LLM repository configuration
withCredentials([string(credentialsId: 'default-llm-repo', variable: 'DEFAULT_LLM_REPO')]) {
LLM_REPO = env.gitlabSourceRepoHttpUrl ? env.gitlabSourceRepoHttpUrl : "${DEFAULT_LLM_REPO}"
}
LLM_ROOT = "llm"
def createKubernetesPodConfig(image, arch = "amd64")
{
def archSuffix = arch == "arm64" ? "arm" : "amd"
def jnlpImage = "urm.nvidia.com/sw-ipp-blossom-sre-docker-local/lambda/custom_jnlp_images_${archSuffix}_linux:jdk17"
def podConfig = [
cloud: "kubernetes-cpu",
namespace: "sw-tensorrt",
yaml: """
apiVersion: v1
kind: Pod
spec:
nodeSelector:
nvidia.com/node_type: builder
kubernetes.io/os: linux
containers:
- name: trt-llm
image: ${image}
command: ['cat']
volumeMounts:
- name: sw-tensorrt-pvc
mountPath: "/mnt/sw-tensorrt-pvc"
readOnly: false
tty: true
resources:
requests:
cpu: 2
memory: 5Gi
ephemeral-storage: 25Gi
limits:
cpu: 2
memory: 5Gi
ephemeral-storage: 25Gi
imagePullPolicy: Always
- name: jnlp
image: ${jnlpImage}
args: ['\$(JENKINS_SECRET)', '\$(JENKINS_NAME)']
resources:
requests:
cpu: '2'
memory: 5Gi
ephemeral-storage: 25Gi
limits:
cpu: '2'
memory: 5Gi
ephemeral-storage: 25Gi
qosClass: Guaranteed
volumes:
- name: sw-tensorrt-pvc
persistentVolumeClaim:
claimName: sw-tensorrt-pvc
""".stripIndent(),
]
return podConfig
}
pipeline {
agent {
kubernetes createKubernetesPodConfig(DOCKER_IMAGE)
}
options {
timestamps()
}
environment {
OPEN_SEARCH_DB_BASE_URL=credentials("open_search_db_base_url")
OPEN_SEARCH_DB_CREDENTIALS=credentials("open_search_db_credentials")
}
parameters {
string(name: "BRANCH", defaultValue: "main", description: "Branch to checkout.")
string(name: "OPEN_SEARCH_PROJECT_NAME", defaultValue: "swdl-trtllm-infra-ci-prod-perf_sanity_info", description: "OpenSearch project name.")
string(name: "OPERATION", defaultValue: "SLACK BOT SENDS MESSAGE", description: "Operation to perform.")
string(name: "QUERY_JOB_NUMBER", defaultValue: "1", description: "Number of latest jobs to query.")
string(name: "SLACK_CHANNEL_ID", defaultValue: "C0A7D0LCA1F", description: "Slack channel IDs to send messages to.")
string(name: "SLACK_BOT_TOKEN", defaultValue: "", description: "Slack bot token for authentication.")
}
stages {
stage("Run Perf Sanity Script") {
steps {
container("trt-llm") {
script {
sh "pwd && ls -alh"
sh "env | sort"
trtllm_utils.checkoutSource(LLM_REPO, params.BRANCH, LLM_ROOT, false, false)
sh "pip install slack_sdk"
sh """
cd ${LLM_ROOT}/jenkins/scripts/perf && ls -alh && python3 perf_sanity_triage.py \
--project_name "${params.OPEN_SEARCH_PROJECT_NAME}" \
--operation "${params.OPERATION}" \
--channel_id "${params.SLACK_CHANNEL_ID}" \
--bot_token "${params.SLACK_BOT_TOKEN}" \
--query_job_number "${params.QUERY_JOB_NUMBER}"
"""
}
}
}
} // stage Run Perf Sanity Script
} // stages
} // pipeline

View File

@ -0,0 +1,251 @@
#!/usr/bin/env python3
import argparse
import json
import sys
import time
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
sys.path.insert(0, sys.path[0] + "/..")
from open_search_db import OpenSearchDB
QUERY_LOOKBACK_DAYS = 90
MAX_QUERY_SIZE = 3000
MAX_TEST_CASES_PER_MSG = 5
POST_SLACK_MSG_RETRY_TIMES = 5
def query_regression_data(project_name):
"""Query regression data from OpenSearch database."""
last_days = QUERY_LOOKBACK_DAYS
must_clauses = [
{"term": {"b_is_valid": True}},
{"term": {"b_is_post_merge": True}},
{"term": {"b_is_regression": True}},
{"term": {"b_is_baseline": False}},
{
"range": {
"ts_created": {
"gte": int(time.time() - 24 * 3600 * last_days)
// (24 * 3600)
* 24
* 3600
* 1000,
}
}
},
]
json_data = {
"query": {
"bool": {"must": must_clauses},
},
"size": MAX_QUERY_SIZE,
}
json_data = json.dumps(json_data)
data_list = []
try:
res = OpenSearchDB.queryFromOpenSearchDB(json_data, project_name)
if res is None:
print(f"Failed to query from {project_name}, returned no response")
return None
payload = res.json().get("hits", {}).get("hits", [])
if len(payload) == 0:
print(f"No regression data found in {project_name}, returned empty list")
return []
for hit in payload:
data_dict = hit.get("_source", {})
data_dict["_id"] = hit.get("_id", "")
if data_dict["_id"] == "":
print(f"Failed to query from {project_name}, returned data with no _id")
return None
data_list.append(data_dict)
print(f"Successfully queried from {project_name}, queried {len(data_list)} entries")
return data_list
except Exception as e:
print(f"Failed to query from {project_name}, returned error: {e}")
return None
def get_regression_data_by_job_id(data_list, query_job_number):
"""Returns a dict with job_id as key and list of regression data as value.
Only returns the latest query_job_number jobs.
"""
if data_list is None or len(data_list) == 0:
return {}
# Group data by job_id
job_data_dict = {}
for data in data_list:
job_id = data.get("s_job_id", "")
if job_id == "":
continue
if job_id not in job_data_dict:
job_data_dict[job_id] = []
job_data_dict[job_id].append(data)
# Sort job_ids by the latest ts_created in each group (descending)
def get_latest_timestamp(job_id):
timestamps = [d.get("ts_created", 0) for d in job_data_dict[job_id]]
return max(timestamps) if timestamps else 0
sorted_job_ids = sorted(job_data_dict.keys(), key=get_latest_timestamp, reverse=True)
# Only keep the latest query_job_number jobs
latest_job_ids = sorted_job_ids[:query_job_number]
result = {}
for job_id in latest_job_ids:
result[job_id] = job_data_dict[job_id]
return result
def process_regression_message(regression_dict):
"""Process regression data into message chunks.
Returns a list of messages, each containing at most MAX_TEST_CASES_PER_MSG test cases.
"""
if not regression_dict:
return []
# Flatten all test cases into a list with (job_id, idx, data) tuples
all_test_cases = []
for job_id, data_list in regression_dict.items():
sorted_data_list = sorted(data_list, key=lambda x: x.get("s_test_case_name", ""))
for idx, data in enumerate(sorted_data_list, start=1):
all_test_cases.append((job_id, idx, data))
# Split into chunks of MAX_TEST_CASES_PER_MSG
chunks = []
for i in range(0, len(all_test_cases), MAX_TEST_CASES_PER_MSG):
chunks.append(all_test_cases[i : i + MAX_TEST_CASES_PER_MSG])
# Build messages for each chunk
messages = []
for chunk in chunks:
msg_parts = []
current_job_id = None
for job_id, idx, data in chunk:
# Add job header when switching to a new job_id
if job_id != current_job_id:
if msg_parts:
msg_parts.append("\n")
job_header = f"*LLM/main/L0_PostMerge/{job_id}:*\n"
msg_parts.append(job_header)
current_job_id = job_id
test_case_name = data.get("s_test_case_name", "N/A")
regression_info = data.get("s_regression_info", "N/A")
msg_parts.append(f"*REGRESSION TEST CASE {idx}: {test_case_name}*\n")
for part in regression_info.split(","):
part = part.strip()
if part and "baseline_id" not in part:
msg_parts.append(f" {part}\n")
msg = "".join(msg_parts).strip()
messages.append(msg)
return messages
def send_regression_message(messages, channel_id, bot_token):
"""Send regression messages to Slack channel(s).
channel_id can be a single ID or multiple IDs separated by commas.
"""
if not messages:
print("No regression data to send")
return
if channel_id and bot_token:
channel_ids = [cid.strip() for cid in channel_id.split(",") if cid.strip()]
for cid in channel_ids:
for msg in messages:
send_message(msg, cid, bot_token)
else:
print("Slack channel_id or bot_token not provided, printing message:")
for i, msg in enumerate(messages, start=1):
print(f"--- Message {i} ---")
print(msg)
def send_message(msg, channel_id, bot_token):
"""Send message to Slack channel using slack_sdk."""
client = WebClient(token=bot_token)
attachments = [
{
"title": "Perf Sanity Regression Report",
"color": "#ff0000",
"text": msg,
}
]
for attempt in range(1, POST_SLACK_MSG_RETRY_TIMES + 1):
try:
result = client.chat_postMessage(
channel=channel_id,
attachments=attachments,
)
assert result["ok"] is True, json.dumps(result.data)
print(f"Message sent successfully to channel {channel_id}")
return
except SlackApiError as e:
print(
f"Attempt {attempt}/{POST_SLACK_MSG_RETRY_TIMES}: Error sending message to Slack: {e}"
)
except Exception as e:
print(f"Attempt {attempt}/{POST_SLACK_MSG_RETRY_TIMES}: Unexpected error: {e}")
if attempt < POST_SLACK_MSG_RETRY_TIMES:
time.sleep(1)
print(
f"Failed to send message to channel {channel_id} after {POST_SLACK_MSG_RETRY_TIMES} attempts"
)
def main():
parser = argparse.ArgumentParser(description="Perf Sanity Triage Script")
parser.add_argument("--project_name", type=str, required=True, help="OpenSearch project name")
parser.add_argument("--operation", type=str, required=True, help="Operation to perform")
parser.add_argument(
"--channel_id",
type=str,
default="",
help="Slack channel ID(s), comma-separated for multiple channels",
)
parser.add_argument("--bot_token", type=str, default="", help="Slack bot token")
parser.add_argument(
"--query_job_number", type=int, default=1, help="Number of latest jobs to query"
)
args = parser.parse_args()
print(f"Project Name: {args.project_name}")
print(f"Operation: {args.operation}")
print(f"Channel ID: {args.channel_id}")
print(f"Bot Token: {'***' if args.bot_token else 'Not provided'}")
print(f"Query Job Number: {args.query_job_number}")
if args.operation == "SLACK BOT SENDS MESSAGE":
data_list = query_regression_data(args.project_name)
if data_list is None:
print("Failed to query regression data")
return
regression_dict = get_regression_data_by_job_id(data_list, args.query_job_number)
messages = process_regression_message(regression_dict)
send_regression_message(messages, args.channel_id, args.bot_token)
else:
print(f"Unknown operation: {args.operation}")
if __name__ == "__main__":
main()

View File

@ -37,3 +37,4 @@ opentelemetry-exporter-otlp>=1.26.0
opentelemetry-semantic-conventions-ai>=0.4.1
fuzzywuzzy==0.18.0
aiperf==0.3.0
nanobind>=2.9.0

View File

@ -150,53 +150,58 @@ testing = ["filelock"]
[[package]]
name = "tomli"
version = "2.3.0"
version = "2.4.0"
description = "A lil' TOML parser"
optional = false
python-versions = ">=3.8"
files = [
{file = "tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45"},
{file = "tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba"},
{file = "tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf"},
{file = "tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441"},
{file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845"},
{file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c"},
{file = "tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456"},
{file = "tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be"},
{file = "tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac"},
{file = "tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22"},
{file = "tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f"},
{file = "tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52"},
{file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8"},
{file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6"},
{file = "tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876"},
{file = "tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878"},
{file = "tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b"},
{file = "tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae"},
{file = "tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b"},
{file = "tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf"},
{file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f"},
{file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05"},
{file = "tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606"},
{file = "tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999"},
{file = "tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e"},
{file = "tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3"},
{file = "tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc"},
{file = "tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0"},
{file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879"},
{file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005"},
{file = "tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463"},
{file = "tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8"},
{file = "tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77"},
{file = "tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf"},
{file = "tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530"},
{file = "tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b"},
{file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67"},
{file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f"},
{file = "tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0"},
{file = "tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba"},
{file = "tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b"},
{file = "tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549"},
{file = "tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867"},
{file = "tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9"},
{file = "tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95"},
{file = "tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76"},
{file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d"},
{file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576"},
{file = "tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a"},
{file = "tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa"},
{file = "tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614"},
{file = "tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1"},
{file = "tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8"},
{file = "tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a"},
{file = "tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1"},
{file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b"},
{file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51"},
{file = "tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729"},
{file = "tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da"},
{file = "tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3"},
{file = "tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0"},
{file = "tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e"},
{file = "tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4"},
{file = "tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e"},
{file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c"},
{file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f"},
{file = "tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86"},
{file = "tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87"},
{file = "tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132"},
{file = "tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6"},
{file = "tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc"},
{file = "tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66"},
{file = "tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d"},
{file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702"},
{file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8"},
{file = "tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776"},
{file = "tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475"},
{file = "tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2"},
{file = "tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9"},
{file = "tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0"},
{file = "tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df"},
{file = "tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d"},
{file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f"},
{file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b"},
{file = "tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087"},
{file = "tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd"},
{file = "tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4"},
{file = "tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a"},
{file = "tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c"},
]
[[package]]

View File

@ -1119,53 +1119,58 @@ test = ["pytest"]
[[package]]
name = "tomli"
version = "2.3.0"
version = "2.4.0"
description = "A lil' TOML parser"
optional = false
python-versions = ">=3.8"
files = [
{file = "tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45"},
{file = "tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba"},
{file = "tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf"},
{file = "tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441"},
{file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845"},
{file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c"},
{file = "tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456"},
{file = "tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be"},
{file = "tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac"},
{file = "tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22"},
{file = "tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f"},
{file = "tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52"},
{file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8"},
{file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6"},
{file = "tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876"},
{file = "tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878"},
{file = "tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b"},
{file = "tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae"},
{file = "tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b"},
{file = "tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf"},
{file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f"},
{file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05"},
{file = "tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606"},
{file = "tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999"},
{file = "tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e"},
{file = "tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3"},
{file = "tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc"},
{file = "tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0"},
{file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879"},
{file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005"},
{file = "tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463"},
{file = "tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8"},
{file = "tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77"},
{file = "tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf"},
{file = "tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530"},
{file = "tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b"},
{file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67"},
{file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f"},
{file = "tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0"},
{file = "tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba"},
{file = "tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b"},
{file = "tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549"},
{file = "tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867"},
{file = "tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9"},
{file = "tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95"},
{file = "tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76"},
{file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d"},
{file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576"},
{file = "tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a"},
{file = "tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa"},
{file = "tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614"},
{file = "tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1"},
{file = "tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8"},
{file = "tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a"},
{file = "tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1"},
{file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b"},
{file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51"},
{file = "tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729"},
{file = "tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da"},
{file = "tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3"},
{file = "tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0"},
{file = "tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e"},
{file = "tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4"},
{file = "tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e"},
{file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c"},
{file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f"},
{file = "tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86"},
{file = "tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87"},
{file = "tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132"},
{file = "tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6"},
{file = "tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc"},
{file = "tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66"},
{file = "tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d"},
{file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702"},
{file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8"},
{file = "tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776"},
{file = "tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475"},
{file = "tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2"},
{file = "tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9"},
{file = "tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0"},
{file = "tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df"},
{file = "tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d"},
{file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f"},
{file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b"},
{file = "tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087"},
{file = "tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd"},
{file = "tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4"},
{file = "tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a"},
{file = "tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c"},
]
[[package]]

View File

@ -263,13 +263,13 @@ files = [
[[package]]
name = "openai"
version = "2.14.0"
version = "2.15.0"
description = "The official Python library for the openai API"
optional = false
python-versions = ">=3.9"
files = [
{file = "openai-2.14.0-py3-none-any.whl", hash = "sha256:7ea40aca4ffc4c4a776e77679021b47eec1160e341f42ae086ba949c9dcc9183"},
{file = "openai-2.14.0.tar.gz", hash = "sha256:419357bedde9402d23bf8f2ee372fca1985a73348debba94bddff06f19459952"},
{file = "openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3"},
{file = "openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba"},
]
[package.dependencies]

View File

@ -665,13 +665,13 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -2203,13 +2203,13 @@ test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1
[[package]]
name = "peft"
version = "0.18.0"
version = "0.18.1"
description = "Parameter-Efficient Fine-Tuning (PEFT)"
optional = false
python-versions = ">=3.10.0"
files = [
{file = "peft-0.18.0-py3-none-any.whl", hash = "sha256:624f69ca6393b765ccc6734adda7ca57d80b238f0900a42c357d8b67a03d62ff"},
{file = "peft-0.18.0.tar.gz", hash = "sha256:c81c80b2056ab40c23d58ef25f74daab417ac653970718589a11a8af28218588"},
{file = "peft-0.18.1-py3-none-any.whl", hash = "sha256:0bf06847a3551e3019fc58c440cffc9a6b73e6e2962c95b52e224f77bbdb50f1"},
{file = "peft-0.18.1.tar.gz", hash = "sha256:2dd0d6bfce936d1850e48aaddbd250941c5c02fc8ef3237cd8fd5aac35e0bae2"},
]
[package.dependencies]
@ -3705,13 +3705,13 @@ files = [
[[package]]
name = "werkzeug"
version = "3.1.4"
version = "3.1.5"
description = "The comprehensive WSGI web application library."
optional = false
python-versions = ">=3.9"
files = [
{file = "werkzeug-3.1.4-py3-none-any.whl", hash = "sha256:2ad50fb9ed09cc3af22c54698351027ace879a0b60a3b5edf5730b2f7d876905"},
{file = "werkzeug-3.1.4.tar.gz", hash = "sha256:cd3cd98b1b92dc3b7b3995038826c68097dcb16f9baa63abe35f20eafeb9fe5e"},
{file = "werkzeug-3.1.5-py3-none-any.whl", hash = "sha256:5111e36e91086ece91f93268bb39b4a35c1e6f1feac762c9c822ded0a4e322dc"},
{file = "werkzeug-3.1.5.tar.gz", hash = "sha256:6a548b0e88955dd07ccb25539d7d0cc97417ee9e179677d22c7041c8f078ce67"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -552,13 +552,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -1970,13 +1970,13 @@ test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1
[[package]]
name = "peft"
version = "0.18.0"
version = "0.18.1"
description = "Parameter-Efficient Fine-Tuning (PEFT)"
optional = false
python-versions = ">=3.10.0"
files = [
{file = "peft-0.18.0-py3-none-any.whl", hash = "sha256:624f69ca6393b765ccc6734adda7ca57d80b238f0900a42c357d8b67a03d62ff"},
{file = "peft-0.18.0.tar.gz", hash = "sha256:c81c80b2056ab40c23d58ef25f74daab417ac653970718589a11a8af28218588"},
{file = "peft-0.18.1-py3-none-any.whl", hash = "sha256:0bf06847a3551e3019fc58c440cffc9a6b73e6e2962c95b52e224f77bbdb50f1"},
{file = "peft-0.18.1.tar.gz", hash = "sha256:2dd0d6bfce936d1850e48aaddbd250941c5c02fc8ef3237cd8fd5aac35e0bae2"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -499,13 +499,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -781,13 +781,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]
@ -1343,21 +1343,21 @@ files = [
[[package]]
name = "protobuf"
version = "6.33.2"
version = "6.33.3"
description = ""
optional = false
python-versions = ">=3.9"
files = [
{file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
{file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
{file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
{file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
{file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
{file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
{file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
{file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
{file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
{file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
{file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
{file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
{file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
{file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
]
[[package]]
@ -2234,4 +2234,4 @@ propcache = ">=0.2.1"
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "072741ab4fb1cf908a6e193f60f71e7205d9e9d3c74839bc9a0d153046c675cf"
content-hash = "afb9d901c2b136e6d39517fb48361cf0e030ee48cdfb15f5486ac8d83e8f48ca"

View File

@ -9,7 +9,7 @@ readme = "README.md"
python = ">=3.10,<3.13"
datasets = "3.1.0"
evaluate = "^0.4.6"
protobuf = "^6.33.2"
protobuf = "^6.33.3"
rouge-score = "^0.1.2"
sentencepiece = "^0.2.1"
tiktoken = "^0.12.0"

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]
@ -1343,21 +1343,21 @@ files = [
[[package]]
name = "protobuf"
version = "6.33.2"
version = "6.33.3"
description = ""
optional = false
python-versions = ">=3.9"
files = [
{file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
{file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
{file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
{file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
{file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
{file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
{file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
{file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
{file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
{file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
{file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
{file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
{file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
{file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
]
[[package]]
@ -2234,4 +2234,4 @@ propcache = ">=0.2.1"
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "072741ab4fb1cf908a6e193f60f71e7205d9e9d3c74839bc9a0d153046c675cf"
content-hash = "afb9d901c2b136e6d39517fb48361cf0e030ee48cdfb15f5486ac8d83e8f48ca"

View File

@ -9,7 +9,7 @@ readme = "README.md"
python = ">=3.10,<3.13"
datasets = "3.1.0"
evaluate = "^0.4.6"
protobuf = "^6.33.2"
protobuf = "^6.33.3"
rouge-score = "^0.1.2"
sentencepiece = "^0.2.1"
tiktoken = "^0.12.0"

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]
@ -1343,21 +1343,21 @@ files = [
[[package]]
name = "protobuf"
version = "6.33.2"
version = "6.33.3"
description = ""
optional = false
python-versions = ">=3.9"
files = [
{file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
{file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
{file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
{file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
{file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
{file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
{file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
{file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
{file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
{file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
{file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
{file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
{file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
{file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
]
[[package]]
@ -2234,4 +2234,4 @@ propcache = ">=0.2.1"
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "072741ab4fb1cf908a6e193f60f71e7205d9e9d3c74839bc9a0d153046c675cf"
content-hash = "afb9d901c2b136e6d39517fb48361cf0e030ee48cdfb15f5486ac8d83e8f48ca"

View File

@ -9,7 +9,7 @@ readme = "README.md"
python = ">=3.10,<3.13"
datasets = "3.1.0"
evaluate = "^0.4.6"
protobuf = "^6.33.2"
protobuf = "^6.33.3"
rouge-score = "^0.1.2"
sentencepiece = "^0.2.1"
tiktoken = "^0.12.0"

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -453,13 +453,13 @@ torch = ["torch"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -567,13 +567,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -881,13 +881,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]
@ -1955,21 +1955,21 @@ files = [
[[package]]
name = "protobuf"
version = "6.33.2"
version = "6.33.3"
description = ""
optional = false
python-versions = ">=3.9"
files = [
{file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
{file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
{file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
{file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
{file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
{file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
{file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
{file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
{file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
{file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
{file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
{file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
{file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
{file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
]
[[package]]
@ -2396,80 +2396,80 @@ test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis
[[package]]
name = "scipy"
version = "1.16.3"
version = "1.17.0"
description = "Fundamental algorithms for scientific computing in Python"
optional = false
python-versions = ">=3.11"
files = [
{file = "scipy-1.16.3-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:40be6cf99e68b6c4321e9f8782e7d5ff8265af28ef2cd56e9c9b2638fa08ad97"},
{file = "scipy-1.16.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:8be1ca9170fcb6223cc7c27f4305d680ded114a1567c0bd2bfcbf947d1b17511"},
{file = "scipy-1.16.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:bea0a62734d20d67608660f69dcda23e7f90fb4ca20974ab80b6ed40df87a005"},
{file = "scipy-1.16.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:2a207a6ce9c24f1951241f4693ede2d393f59c07abc159b2cb2be980820e01fb"},
{file = "scipy-1.16.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:532fb5ad6a87e9e9cd9c959b106b73145a03f04c7d57ea3e6f6bb60b86ab0876"},
{file = "scipy-1.16.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0151a0749efeaaab78711c78422d413c583b8cdd2011a3c1d6c794938ee9fdb2"},
{file = "scipy-1.16.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7180967113560cca57418a7bc719e30366b47959dd845a93206fbed693c867e"},
{file = "scipy-1.16.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:deb3841c925eeddb6afc1e4e4a45e418d19ec7b87c5df177695224078e8ec733"},
{file = "scipy-1.16.3-cp311-cp311-win_amd64.whl", hash = "sha256:53c3844d527213631e886621df5695d35e4f6a75f620dca412bcd292f6b87d78"},
{file = "scipy-1.16.3-cp311-cp311-win_arm64.whl", hash = "sha256:9452781bd879b14b6f055b26643703551320aa8d79ae064a71df55c00286a184"},
{file = "scipy-1.16.3-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:81fc5827606858cf71446a5e98715ba0e11f0dbc83d71c7409d05486592a45d6"},
{file = "scipy-1.16.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:c97176013d404c7346bf57874eaac5187d969293bf40497140b0a2b2b7482e07"},
{file = "scipy-1.16.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:2b71d93c8a9936046866acebc915e2af2e292b883ed6e2cbe5c34beb094b82d9"},
{file = "scipy-1.16.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:3d4a07a8e785d80289dfe66b7c27d8634a773020742ec7187b85ccc4b0e7b686"},
{file = "scipy-1.16.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0553371015692a898e1aa858fed67a3576c34edefa6b7ebdb4e9dde49ce5c203"},
{file = "scipy-1.16.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:72d1717fd3b5e6ec747327ce9bda32d5463f472c9dce9f54499e81fbd50245a1"},
{file = "scipy-1.16.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1fb2472e72e24d1530debe6ae078db70fb1605350c88a3d14bc401d6306dbffe"},
{file = "scipy-1.16.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c5192722cffe15f9329a3948c4b1db789fbb1f05c97899187dcf009b283aea70"},
{file = "scipy-1.16.3-cp312-cp312-win_amd64.whl", hash = "sha256:56edc65510d1331dae01ef9b658d428e33ed48b4f77b1d51caf479a0253f96dc"},
{file = "scipy-1.16.3-cp312-cp312-win_arm64.whl", hash = "sha256:a8a26c78ef223d3e30920ef759e25625a0ecdd0d60e5a8818b7513c3e5384cf2"},
{file = "scipy-1.16.3-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:d2ec56337675e61b312179a1ad124f5f570c00f920cc75e1000025451b88241c"},
{file = "scipy-1.16.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:16b8bc35a4cc24db80a0ec836a9286d0e31b2503cb2fd7ff7fb0e0374a97081d"},
{file = "scipy-1.16.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:5803c5fadd29de0cf27fa08ccbfe7a9e5d741bf63e4ab1085437266f12460ff9"},
{file = "scipy-1.16.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:b81c27fc41954319a943d43b20e07c40bdcd3ff7cf013f4fb86286faefe546c4"},
{file = "scipy-1.16.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0c3b4dd3d9b08dbce0f3440032c52e9e2ab9f96ade2d3943313dfe51a7056959"},
{file = "scipy-1.16.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7dc1360c06535ea6116a2220f760ae572db9f661aba2d88074fe30ec2aa1ff88"},
{file = "scipy-1.16.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:663b8d66a8748051c3ee9c96465fb417509315b99c71550fda2591d7dd634234"},
{file = "scipy-1.16.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eab43fae33a0c39006a88096cd7b4f4ef545ea0447d250d5ac18202d40b6611d"},
{file = "scipy-1.16.3-cp313-cp313-win_amd64.whl", hash = "sha256:062246acacbe9f8210de8e751b16fc37458213f124bef161a5a02c7a39284304"},
{file = "scipy-1.16.3-cp313-cp313-win_arm64.whl", hash = "sha256:50a3dbf286dbc7d84f176f9a1574c705f277cb6565069f88f60db9eafdbe3ee2"},
{file = "scipy-1.16.3-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:fb4b29f4cf8cc5a8d628bc8d8e26d12d7278cd1f219f22698a378c3d67db5e4b"},
{file = "scipy-1.16.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:8d09d72dc92742988b0e7750bddb8060b0c7079606c0d24a8cc8e9c9c11f9079"},
{file = "scipy-1.16.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:03192a35e661470197556de24e7cb1330d84b35b94ead65c46ad6f16f6b28f2a"},
{file = "scipy-1.16.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:57d01cb6f85e34f0946b33caa66e892aae072b64b034183f3d87c4025802a119"},
{file = "scipy-1.16.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:96491a6a54e995f00a28a3c3badfff58fd093bf26cd5fb34a2188c8c756a3a2c"},
{file = "scipy-1.16.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cd13e354df9938598af2be05822c323e97132d5e6306b83a3b4ee6724c6e522e"},
{file = "scipy-1.16.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:63d3cdacb8a824a295191a723ee5e4ea7768ca5ca5f2838532d9f2e2b3ce2135"},
{file = "scipy-1.16.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e7efa2681ea410b10dde31a52b18b0154d66f2485328830e45fdf183af5aefc6"},
{file = "scipy-1.16.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2d1ae2cf0c350e7705168ff2429962a89ad90c2d49d1dd300686d8b2a5af22fc"},
{file = "scipy-1.16.3-cp313-cp313t-win_arm64.whl", hash = "sha256:0c623a54f7b79dd88ef56da19bc2873afec9673a48f3b85b18e4d402bdd29a5a"},
{file = "scipy-1.16.3-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:875555ce62743e1d54f06cdf22c1e0bc47b91130ac40fe5d783b6dfa114beeb6"},
{file = "scipy-1.16.3-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:bb61878c18a470021fb515a843dc7a76961a8daceaaaa8bad1332f1bf4b54657"},
{file = "scipy-1.16.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f2622206f5559784fa5c4b53a950c3c7c1cf3e84ca1b9c4b6c03f062f289ca26"},
{file = "scipy-1.16.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7f68154688c515cdb541a31ef8eb66d8cd1050605be9dcd74199cbd22ac739bc"},
{file = "scipy-1.16.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b3c820ddb80029fe9f43d61b81d8b488d3ef8ca010d15122b152db77dc94c22"},
{file = "scipy-1.16.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d3837938ae715fc0fe3c39c0202de3a8853aff22ca66781ddc2ade7554b7e2cc"},
{file = "scipy-1.16.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:aadd23f98f9cb069b3bd64ddc900c4d277778242e961751f77a8cb5c4b946fb0"},
{file = "scipy-1.16.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b7c5f1bda1354d6a19bc6af73a649f8285ca63ac6b52e64e658a5a11d4d69800"},
{file = "scipy-1.16.3-cp314-cp314-win_amd64.whl", hash = "sha256:e5d42a9472e7579e473879a1990327830493a7047506d58d73fc429b84c1d49d"},
{file = "scipy-1.16.3-cp314-cp314-win_arm64.whl", hash = "sha256:6020470b9d00245926f2d5bb93b119ca0340f0d564eb6fbaad843eaebf9d690f"},
{file = "scipy-1.16.3-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:e1d27cbcb4602680a49d787d90664fa4974063ac9d4134813332a8c53dbe667c"},
{file = "scipy-1.16.3-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:9b9c9c07b6d56a35777a1b4cc8966118fb16cfd8daf6743867d17d36cfad2d40"},
{file = "scipy-1.16.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:3a4c460301fb2cffb7f88528f30b3127742cff583603aa7dc964a52c463b385d"},
{file = "scipy-1.16.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:f667a4542cc8917af1db06366d3f78a5c8e83badd56409f94d1eac8d8d9133fa"},
{file = "scipy-1.16.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f379b54b77a597aa7ee5e697df0d66903e41b9c85a6dd7946159e356319158e8"},
{file = "scipy-1.16.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4aff59800a3b7f786b70bfd6ab551001cb553244988d7d6b8299cb1ea653b353"},
{file = "scipy-1.16.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:da7763f55885045036fabcebd80144b757d3db06ab0861415d1c3b7c69042146"},
{file = "scipy-1.16.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ffa6eea95283b2b8079b821dc11f50a17d0571c92b43e2b5b12764dc5f9b285d"},
{file = "scipy-1.16.3-cp314-cp314t-win_amd64.whl", hash = "sha256:d9f48cafc7ce94cf9b15c6bffdc443a81a27bf7075cf2dcd5c8b40f85d10c4e7"},
{file = "scipy-1.16.3-cp314-cp314t-win_arm64.whl", hash = "sha256:21d9d6b197227a12dcbf9633320a4e34c6b0e51c57268df255a0942983bac562"},
{file = "scipy-1.16.3.tar.gz", hash = "sha256:01e87659402762f43bd2fee13370553a17ada367d42e7487800bf2916535aecb"},
{file = "scipy-1.17.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:2abd71643797bd8a106dff97894ff7869eeeb0af0f7a5ce02e4227c6a2e9d6fd"},
{file = "scipy-1.17.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:ef28d815f4d2686503e5f4f00edc387ae58dfd7a2f42e348bb53359538f01558"},
{file = "scipy-1.17.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:272a9f16d6bb4667e8b50d25d71eddcc2158a214df1b566319298de0939d2ab7"},
{file = "scipy-1.17.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:7204fddcbec2fe6598f1c5fdf027e9f259106d05202a959a9f1aecf036adc9f6"},
{file = "scipy-1.17.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fc02c37a5639ee67d8fb646ffded6d793c06c5622d36b35cfa8fe5ececb8f042"},
{file = "scipy-1.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dac97a27520d66c12a34fd90a4fe65f43766c18c0d6e1c0a80f114d2260080e4"},
{file = "scipy-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ebb7446a39b3ae0fe8f416a9a3fdc6fba3f11c634f680f16a239c5187bc487c0"},
{file = "scipy-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:474da16199f6af66601a01546144922ce402cb17362e07d82f5a6cf8f963e449"},
{file = "scipy-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:255c0da161bd7b32a6c898e7891509e8a9289f0b1c6c7d96142ee0d2b114c2ea"},
{file = "scipy-1.17.0-cp311-cp311-win_arm64.whl", hash = "sha256:85b0ac3ad17fa3be50abd7e69d583d98792d7edc08367e01445a1e2076005379"},
{file = "scipy-1.17.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:0d5018a57c24cb1dd828bcf51d7b10e65986d549f52ef5adb6b4d1ded3e32a57"},
{file = "scipy-1.17.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:88c22af9e5d5a4f9e027e26772cc7b5922fab8bcc839edb3ae33de404feebd9e"},
{file = "scipy-1.17.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f3cd947f20fe17013d401b64e857c6b2da83cae567adbb75b9dcba865abc66d8"},
{file = "scipy-1.17.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e8c0b331c2c1f531eb51f1b4fc9ba709521a712cce58f1aa627bc007421a5306"},
{file = "scipy-1.17.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5194c445d0a1c7a6c1a4a4681b6b7c71baad98ff66d96b949097e7513c9d6742"},
{file = "scipy-1.17.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9eeb9b5f5997f75507814ed9d298ab23f62cf79f5a3ef90031b1ee2506abdb5b"},
{file = "scipy-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:40052543f7bbe921df4408f46003d6f01c6af109b9e2c8a66dd1cf6cf57f7d5d"},
{file = "scipy-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0cf46c8013fec9d3694dc572f0b54100c28405d55d3e2cb15e2895b25057996e"},
{file = "scipy-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:0937a0b0d8d593a198cededd4c439a0ea216a3f36653901ea1f3e4be949056f8"},
{file = "scipy-1.17.0-cp312-cp312-win_arm64.whl", hash = "sha256:f603d8a5518c7426414d1d8f82e253e454471de682ce5e39c29adb0df1efb86b"},
{file = "scipy-1.17.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:65ec32f3d32dfc48c72df4291345dae4f048749bc8d5203ee0a3f347f96c5ce6"},
{file = "scipy-1.17.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1f9586a58039d7229ce77b52f8472c972448cded5736eaf102d5658bbac4c269"},
{file = "scipy-1.17.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9fad7d3578c877d606b1150135c2639e9de9cecd3705caa37b66862977cc3e72"},
{file = "scipy-1.17.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:423ca1f6584fc03936972b5f7c06961670dbba9f234e71676a7c7ccf938a0d61"},
{file = "scipy-1.17.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fe508b5690e9eaaa9467fc047f833af58f1152ae51a0d0aed67aa5801f4dd7d6"},
{file = "scipy-1.17.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6680f2dfd4f6182e7d6db161344537da644d1cf85cf293f015c60a17ecf08752"},
{file = "scipy-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eec3842ec9ac9de5917899b277428886042a93db0b227ebbe3a333b64ec7643d"},
{file = "scipy-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d7425fcafbc09a03731e1bc05581f5fad988e48c6a861f441b7ab729a49a55ea"},
{file = "scipy-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:87b411e42b425b84777718cc41516b8a7e0795abfa8e8e1d573bf0ef014f0812"},
{file = "scipy-1.17.0-cp313-cp313-win_arm64.whl", hash = "sha256:357ca001c6e37601066092e7c89cca2f1ce74e2a520ca78d063a6d2201101df2"},
{file = "scipy-1.17.0-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:ec0827aa4d36cb79ff1b81de898e948a51ac0b9b1c43e4a372c0508c38c0f9a3"},
{file = "scipy-1.17.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:819fc26862b4b3c73a60d486dbb919202f3d6d98c87cf20c223511429f2d1a97"},
{file = "scipy-1.17.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:363ad4ae2853d88ebcde3ae6ec46ccca903ea9835ee8ba543f12f575e7b07e4e"},
{file = "scipy-1.17.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:979c3a0ff8e5ba254d45d59ebd38cde48fce4f10b5125c680c7a4bfe177aab07"},
{file = "scipy-1.17.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:130d12926ae34399d157de777472bf82e9061c60cc081372b3118edacafe1d00"},
{file = "scipy-1.17.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e886000eb4919eae3a44f035e63f0fd8b651234117e8f6f29bad1cd26e7bc45"},
{file = "scipy-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:13c4096ac6bc31d706018f06a49abe0485f96499deb82066b94d19b02f664209"},
{file = "scipy-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cacbaddd91fcffde703934897c5cd2c7cb0371fac195d383f4e1f1c5d3f3bd04"},
{file = "scipy-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:edce1a1cf66298cccdc48a1bdf8fb10a3bf58e8b58d6c3883dd1530e103f87c0"},
{file = "scipy-1.17.0-cp313-cp313t-win_arm64.whl", hash = "sha256:30509da9dbec1c2ed8f168b8d8aa853bc6723fede1dbc23c7d43a56f5ab72a67"},
{file = "scipy-1.17.0-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:c17514d11b78be8f7e6331b983a65a7f5ca1fd037b95e27b280921fe5606286a"},
{file = "scipy-1.17.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:4e00562e519c09da34c31685f6acc3aa384d4d50604db0f245c14e1b4488bfa2"},
{file = "scipy-1.17.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f7df7941d71314e60a481e02d5ebcb3f0185b8d799c70d03d8258f6c80f3d467"},
{file = "scipy-1.17.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:aabf057c632798832f071a8dde013c2e26284043934f53b00489f1773b33527e"},
{file = "scipy-1.17.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a38c3337e00be6fd8a95b4ed66b5d988bac4ec888fd922c2ea9fe5fb1603dd67"},
{file = "scipy-1.17.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00fb5f8ec8398ad90215008d8b6009c9db9fa924fd4c7d6be307c6f945f9cd73"},
{file = "scipy-1.17.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f2a4942b0f5f7c23c7cd641a0ca1955e2ae83dedcff537e3a0259096635e186b"},
{file = "scipy-1.17.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:dbf133ced83889583156566d2bdf7a07ff89228fe0c0cb727f777de92092ec6b"},
{file = "scipy-1.17.0-cp314-cp314-win_amd64.whl", hash = "sha256:3625c631a7acd7cfd929e4e31d2582cf00f42fcf06011f59281271746d77e061"},
{file = "scipy-1.17.0-cp314-cp314-win_arm64.whl", hash = "sha256:9244608d27eafe02b20558523ba57f15c689357c85bdcfe920b1828750aa26eb"},
{file = "scipy-1.17.0-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:2b531f57e09c946f56ad0b4a3b2abee778789097871fc541e267d2eca081cff1"},
{file = "scipy-1.17.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:13e861634a2c480bd237deb69333ac79ea1941b94568d4b0efa5db5e263d4fd1"},
{file = "scipy-1.17.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:eb2651271135154aa24f6481cbae5cc8af1f0dd46e6533fb7b56aa9727b6a232"},
{file = "scipy-1.17.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:c5e8647f60679790c2f5c76be17e2e9247dc6b98ad0d3b065861e082c56e078d"},
{file = "scipy-1.17.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5fb10d17e649e1446410895639f3385fd2bf4c3c7dfc9bea937bddcbc3d7b9ba"},
{file = "scipy-1.17.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8547e7c57f932e7354a2319fab613981cde910631979f74c9b542bb167a8b9db"},
{file = "scipy-1.17.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33af70d040e8af9d5e7a38b5ed3b772adddd281e3062ff23fec49e49681c38cf"},
{file = "scipy-1.17.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb55bb97d00f8b7ab95cb64f873eb0bf54d9446264d9f3609130381233483f"},
{file = "scipy-1.17.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1ff269abf702f6c7e67a4b7aad981d42871a11b9dd83c58d2d2ea624efbd1088"},
{file = "scipy-1.17.0-cp314-cp314t-win_arm64.whl", hash = "sha256:031121914e295d9791319a1875444d55079885bbae5bdc9c5e0f2ee5f09d34ff"},
{file = "scipy-1.17.0.tar.gz", hash = "sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e"},
]
[package.dependencies]
numpy = ">=1.25.2,<2.6"
numpy = ">=1.26.4,<2.7"
[package.extras]
dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"]
doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.19.1)", "jupytext", "linkify-it-py", "matplotlib (>=3.5)", "myst-nb (>=1.2.0)", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.2.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)"]
dev = ["click (<8.3.0)", "cython-lint (>=0.12.2)", "mypy (==1.10.0)", "pycodestyle", "ruff (>=0.12.0)", "spin", "types-psutil", "typing_extensions"]
doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.19.1)", "jupytext", "linkify-it-py", "matplotlib (>=3.5)", "myst-nb (>=1.2.0)", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.2.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)", "tabulate"]
test = ["Cython", "array-api-strict (>=2.3.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest (>=8.0.0)", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
[[package]]

View File

@ -21,61 +21,61 @@ trio = ["trio (>=0.31.0)", "trio (>=0.32.0)"]
[[package]]
name = "av"
version = "16.0.1"
version = "16.1.0"
description = "Pythonic bindings for FFmpeg's libraries."
optional = false
python-versions = ">=3.10"
files = [
{file = "av-16.0.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:8b141aaa29a3afc96a1d467d106790782c1914628b57309eaadb8c10c299c9c0"},
{file = "av-16.0.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:4b8a08a59a5be0082af063d3f4b216e3950340121c6ea95b505a3f5f5cc8f21d"},
{file = "av-16.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:792e7fc3c08eae005ff36486983966476e553cbb55aaeb0ec99adc4909377320"},
{file = "av-16.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:4e8ef5df76d8d0ee56139789f80bb90ad1a82a7e6df6e080e2e95c06fa22aea7"},
{file = "av-16.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4f7a6985784a7464f078e419c71f5528c3e550ee5d605e7149b4a37a111eb136"},
{file = "av-16.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3f45c8d7b803b6faa2a25a26de5964a0a897de68298d9c9672c7af9d65d8b48a"},
{file = "av-16.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:58e6faf1d9328d8cc6be14c5aadacb7d2965ed6d6ae1af32696993096543ff00"},
{file = "av-16.0.1-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e310d1fb42879df9bad2152a8db6d2ff8bf332c8c36349a09d62cc122f5070fb"},
{file = "av-16.0.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:2f4b357e5615457a84e6b6290916b22864b76b43d5079e1a73bc27581a5b9bac"},
{file = "av-16.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:286665c77034c3a98080169b8b5586d5568a15da81fbcdaf8099252f2d232d7c"},
{file = "av-16.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f88de8e5b8ea29e41af4d8d61df108323d050ccfbc90f15b13ec1f99ce0e841e"},
{file = "av-16.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0cdb71ebe4d1b241cf700f8f0c44a7d2a6602b921e16547dd68c0842113736e1"},
{file = "av-16.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:28c27a65d40e8cf82b6db2543f8feeb8b56d36c1938f50773494cd3b073c7223"},
{file = "av-16.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:ffea39ac7574f234f5168f9b9602e8d4ecdd81853238ec4d661001f03a6d3f64"},
{file = "av-16.0.1-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:e41a8fef85dfb2c717349f9ff74f92f9560122a9f1a94b1c6c9a8a9c9462ba71"},
{file = "av-16.0.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:6352a64b25c9f985d4f279c2902db9a92424e6f2c972161e67119616f0796cb9"},
{file = "av-16.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:5201f7b4b5ed2128118cb90c2a6d64feedb0586ca7c783176896c78ffb4bbd5c"},
{file = "av-16.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:daecc2072b82b6a942acbdaa9a2e00c05234c61fef976b22713983c020b07992"},
{file = "av-16.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6573da96e8bebc3536860a7def108d7dbe1875c86517072431ced702447e6aea"},
{file = "av-16.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4bc064e48a8de6c087b97dd27cf4ef8c13073f0793108fbce3ecd721201b2502"},
{file = "av-16.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0c669b6b6668c8ae74451c15ec6d6d8a36e4c3803dc5d9910f607a174dd18f17"},
{file = "av-16.0.1-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:4c61c6c120f5c5d95c711caf54e2c4a9fb2f1e613ac0a9c273d895f6b2602e44"},
{file = "av-16.0.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ecc2e41320c69095f44aff93470a0d32c30892b2dbad0a08040441c81efa379"},
{file = "av-16.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:036f0554d6faef3f4a94acaeb0cedd388e3ab96eb0eb5a14ec27c17369c466c9"},
{file = "av-16.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:876415470a62e4a3550cc38db2fc0094c25e64eea34d7293b7454125d5958190"},
{file = "av-16.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:56902a06bd0828d13f13352874c370670882048267191ff5829534b611ba3956"},
{file = "av-16.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fe988c2bf0fc2d952858f791f18377ea4ae4e19ba3504793799cd6c2a2562edf"},
{file = "av-16.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:708a66c248848029bf518f0482b81c5803846f1b597ef8013b19c014470b620f"},
{file = "av-16.0.1-cp313-cp313t-macosx_11_0_x86_64.whl", hash = "sha256:79a77ee452537030c21a0b41139bedaf16629636bf764b634e93b99c9d5f4558"},
{file = "av-16.0.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:080823a6ff712f81e7089ae9756fb1512ca1742a138556a852ce50f58e457213"},
{file = "av-16.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:04e00124afa8b46a850ed48951ddda61de874407fb8307d6a875bba659d5727e"},
{file = "av-16.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:bc098c1c6dc4e7080629a7e9560e67bd4b5654951e17e5ddfd2b1515cfcd37db"},
{file = "av-16.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e6ffd3559a72c46a76aa622630751a821499ba5a780b0047ecc75105d43a6b61"},
{file = "av-16.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7a3f1a36b550adadd7513f4f5ee956f9e06b01a88e59f3150ef5fec6879d6f79"},
{file = "av-16.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:c6de794abe52b8c0be55d8bb09ade05905efa74b1a5ab4860b4b9c2bfb6578bf"},
{file = "av-16.0.1-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:4b55ba69a943ae592ad7900da67129422954789de9dc384685d6b529925f542e"},
{file = "av-16.0.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:d4a0c47b6c9bbadad8909b82847f5fe64a608ad392f0b01704e427349bcd9a47"},
{file = "av-16.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:8bba52f3035708456f6b1994d10b0371b45cfd8f917b5e84ff81aef4ec2f08bf"},
{file = "av-16.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:08e34c7e7b5e55e29931180bbe21095e1874ac120992bf6b8615d39574487617"},
{file = "av-16.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0d6250ab9db80c641b299987027c987f14935ea837ea4c02c5f5182f6b69d9e5"},
{file = "av-16.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7b621f28d8bcbb07cdcd7b18943ddc040739ad304545715ae733873b6e1b739d"},
{file = "av-16.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:92101f49082392580c9dba4ba2fe5b931b3bb0fb75a1a848bfb9a11ded68be91"},
{file = "av-16.0.1-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:07c464bf2bc362a154eccc82e235ef64fd3aaf8d76fc8ed63d0ae520943c6d3f"},
{file = "av-16.0.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:750da0673864b669c95882c7b25768cd93ece0e47010d74ebcc29dbb14d611f8"},
{file = "av-16.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:0b7c0d060863b2e341d07cd26851cb9057b7979814148b028fb7ee5d5eb8772d"},
{file = "av-16.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:e67c2eca6023ca7d76b0709c5f392b23a5defba499f4c262411f8155b1482cbd"},
{file = "av-16.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e3243d54d84986e8fbdc1946db634b0c41fe69b6de35a99fa8b763e18503d040"},
{file = "av-16.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bcf73efab5379601e6510abd7afe5f397d0f6defe69b1610c2f37a4a17996b"},
{file = "av-16.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:6368d4ff153d75469d2a3217bc403630dc870a72fe0a014d9135de550d731a86"},
{file = "av-16.0.1.tar.gz", hash = "sha256:dd2ce779fa0b5f5889a6d9e00fbbbc39f58e247e52d31044272648fe16ff1dbf"},
{file = "av-16.1.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:2395748b0c34fe3a150a1721e4f3d4487b939520991b13e7b36f8926b3b12295"},
{file = "av-16.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:72d7ac832710a158eeb7a93242370aa024a7646516291c562ee7f14a7ea881fd"},
{file = "av-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6cbac833092e66b6b0ac4d81ab077970b8ca874951e9c3974d41d922aaa653ed"},
{file = "av-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:eb990672d97c18f99c02f31c8d5750236f770ffe354b5a52c5f4d16c5e65f619"},
{file = "av-16.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:05ad70933ac3b8ef896a820ea64b33b6cca91a5fac5259cb9ba7fa010435be15"},
{file = "av-16.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d831a1062a3c47520bf99de6ec682bd1d64a40dfa958e5457bb613c5270e7ce3"},
{file = "av-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:358ab910fef3c5a806c55176f2b27e5663b33c4d0a692dafeb049c6ed71f8aff"},
{file = "av-16.1.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e88ad64ee9d2b9c4c5d891f16c22ae78e725188b8926eb88187538d9dd0b232f"},
{file = "av-16.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cb296073fa6935724de72593800ba86ae49ed48af03960a4aee34f8a611f442b"},
{file = "av-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:720edd4d25aa73723c1532bb0597806d7b9af5ee34fc02358782c358cfe2f879"},
{file = "av-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c7f2bc703d0df260a1fdf4de4253c7f5500ca9fc57772ea241b0cb241bcf972e"},
{file = "av-16.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d69c393809babada7d54964d56099e4b30a3e1f8b5736ca5e27bd7be0e0f3c83"},
{file = "av-16.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:441892be28582356d53f282873c5a951592daaf71642c7f20165e3ddcb0b4c63"},
{file = "av-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:273a3e32de64819e4a1cd96341824299fe06f70c46f2288b5dc4173944f0fd62"},
{file = "av-16.1.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:640f57b93f927fba8689f6966c956737ee95388a91bd0b8c8b5e0481f73513d6"},
{file = "av-16.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:ae3fb658eec00852ebd7412fdc141f17f3ddce8afee2d2e1cf366263ad2a3b35"},
{file = "av-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ee558d9c02a142eebcbe55578a6d817fedfde42ff5676275504e16d07a7f86"},
{file = "av-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7ae547f6d5fa31763f73900d43901e8c5fa6367bb9a9840978d57b5a7ae14ed2"},
{file = "av-16.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8cf065f9d438e1921dc31fc7aa045790b58aee71736897866420d80b5450f62a"},
{file = "av-16.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a345877a9d3cc0f08e2bc4ec163ee83176864b92587afb9d08dff50f37a9a829"},
{file = "av-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:f49243b1d27c91cd8c66fdba90a674e344eb8eb917264f36117bf2b6879118fd"},
{file = "av-16.1.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:ce2a1b3d8bf619f6c47a9f28cfa7518ff75ddd516c234a4ee351037b05e6a587"},
{file = "av-16.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:408dbe6a2573ca58a855eb8cd854112b33ea598651902c36709f5f84c991ed8e"},
{file = "av-16.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:57f657f86652a160a8a01887aaab82282f9e629abf94c780bbdbb01595d6f0f7"},
{file = "av-16.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:adbad2b355c2ee4552cac59762809d791bda90586d134a33c6f13727fb86cb3a"},
{file = "av-16.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f42e1a68ec2aebd21f7eb6895be69efa6aa27eec1670536876399725bbda4b99"},
{file = "av-16.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58fe47aeaef0f100c40ec8a5de9abbd37f118d3ca03829a1009cf288e9aef67c"},
{file = "av-16.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:565093ebc93b2f4b76782589564869dadfa83af5b852edebedd8fee746457d06"},
{file = "av-16.1.0-cp313-cp313t-macosx_11_0_x86_64.whl", hash = "sha256:574081a24edb98343fd9f473e21ae155bf61443d4ec9d7708987fa597d6b04b2"},
{file = "av-16.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:9ab00ea29c25ebf2ea1d1e928d7babb3532d562481c5d96c0829212b70756ad0"},
{file = "av-16.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a84a91188c1071f238a9523fd42dbe567fb2e2607b22b779851b2ce0eac1b560"},
{file = "av-16.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c2cd0de4dd022a7225ff224fde8e7971496d700be41c50adaaa26c07bb50bf97"},
{file = "av-16.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0816143530624a5a93bc5494f8c6eeaf77549b9366709c2ac8566c1e9bff6df5"},
{file = "av-16.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e3a28053af29644696d0c007e897d19b1197585834660a54773e12a40b16974c"},
{file = "av-16.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2e3e67144a202b95ed299d165232533989390a9ea3119d37eccec697dc6dbb0c"},
{file = "av-16.1.0-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:39a634d8e5a87e78ea80772774bfd20c0721f0d633837ff185f36c9d14ffede4"},
{file = "av-16.1.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:0ba32fb9e9300948a7fa9f8a3fc686e6f7f77599a665c71eb2118fdfd2c743f9"},
{file = "av-16.1.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:ca04d17815182d34ce3edc53cbda78a4f36e956c0fd73e3bab249872a831c4d7"},
{file = "av-16.1.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ee0e8de2e124a9ef53c955fe2add6ee7c56cc8fd83318265549e44057db77142"},
{file = "av-16.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:22bf77a2f658827043a1e184b479c3bf25c4c43ab32353677df2d119f080e28f"},
{file = "av-16.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2dd419d262e6a71cab206d80bbf28e0a10d0f227b671cdf5e854c028faa2d043"},
{file = "av-16.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:53585986fd431cd436f290fba662cfb44d9494fbc2949a183de00acc5b33fa88"},
{file = "av-16.1.0-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:76f5ed8495cf41e1209a5775d3699dc63fdc1740b94a095e2485f13586593205"},
{file = "av-16.1.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:8d55397190f12a1a3ae7538be58c356cceb2bf50df1b33523817587748ce89e5"},
{file = "av-16.1.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:9d51d9037437218261b4bbf9df78a95e216f83d7774fbfe8d289230b5b2e28e2"},
{file = "av-16.1.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0ce07a89c15644407f49d942111ca046e323bbab0a9078ff43ee57c9b4a50dad"},
{file = "av-16.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:cac0c074892ea97113b53556ff41c99562db7b9f09f098adac1f08318c2acad5"},
{file = "av-16.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7dec3dcbc35a187ce450f65a2e0dda820d5a9e6553eea8344a1459af11c98649"},
{file = "av-16.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6f90dc082ff2068ddbe77618400b44d698d25d9c4edac57459e250c16b33d700"},
{file = "av-16.1.0.tar.gz", hash = "sha256:a094b4fd87a3721dacf02794d3d2c82b8d712c85b9534437e82a8a978c175ffd"},
]
[[package]]
@ -150,24 +150,24 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
name = "fsspec"
version = "2025.12.0"
version = "2026.1.0"
description = "File-system specification"
optional = false
python-versions = ">=3.10"
files = [
{file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
{file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
{file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
{file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
]
[package.extras]
@ -178,7 +178,7 @@ dask = ["dask", "distributed"]
dev = ["pre-commit", "ruff (>=0.5)"]
doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
dropbox = ["dropbox", "dropboxdrivefs", "requests"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
fuse = ["fusepy"]
gcs = ["gcsfs"]
git = ["pygit2"]
@ -195,7 +195,7 @@ smb = ["smbprotocol"]
ssh = ["paramiko"]
test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
tqdm = ["tqdm"]
[[package]]
@ -290,13 +290,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]
@ -1250,4 +1250,4 @@ files = [
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "c192429330d74a80d01bb76235d36a9d5ae31fff1b4ad57a0a22f7439a6843e0"
content-hash = "852386737336a4091f1dffd31b545734085aec2c93c195e6f6b182047584c60d"

View File

@ -9,7 +9,7 @@ readme = "README.md"
python = ">=3.10,<3.13"
decord = "^0.6.0"
timm = "^1.0.24"
av = "^16.0.1"
av = "^16.1.0"
[build-system]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -218,24 +218,24 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
name = "fsspec"
version = "2025.12.0"
version = "2026.1.0"
description = "File-system specification"
optional = false
python-versions = ">=3.10"
files = [
{file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
{file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
{file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
{file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
]
[package.extras]
@ -246,7 +246,7 @@ dask = ["dask", "distributed"]
dev = ["pre-commit", "ruff (>=0.5)"]
doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
dropbox = ["dropbox", "dropboxdrivefs", "requests"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
fuse = ["fusepy"]
gcs = ["gcsfs"]
git = ["pygit2"]
@ -263,7 +263,7 @@ smb = ["smbprotocol"]
ssh = ["paramiko"]
test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
tqdm = ["tqdm"]
[[package]]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -166,15 +166,15 @@ test-tox-coverage = ["coverage (>=5.5)"]
[[package]]
name = "bitsandbytes"
version = "0.49.0"
version = "0.49.1"
description = "k-bit optimizers and matrix multiplication routines."
optional = false
python-versions = ">=3.10"
files = [
{file = "bitsandbytes-0.49.0-py3-none-macosx_14_0_arm64.whl", hash = "sha256:17d5b57e6d51b78bcfc07da0e93db061181b25bffabfafe101dd9b75c2710872"},
{file = "bitsandbytes-0.49.0-py3-none-manylinux_2_24_aarch64.whl", hash = "sha256:7e69951b4d207a676986fce967544d9599f23518d0f09d478295996aeff377c2"},
{file = "bitsandbytes-0.49.0-py3-none-manylinux_2_24_x86_64.whl", hash = "sha256:0c46cdef50b3174463b6bdf13715c9f1f00b360be3626e3c5d2f8d226af2cf3f"},
{file = "bitsandbytes-0.49.0-py3-none-win_amd64.whl", hash = "sha256:57a327c6d65f7eda32eb8d416ef8e44d2415c2e7b4fdb735896abd04171ae696"},
{file = "bitsandbytes-0.49.1-py3-none-macosx_14_0_arm64.whl", hash = "sha256:9de01d4384b6c71ef9ab052b98457dc0e4fff8fe06ab14833b5b712700deb005"},
{file = "bitsandbytes-0.49.1-py3-none-manylinux_2_24_aarch64.whl", hash = "sha256:acd4730a0db3762d286707f4a3bc1d013d21dd5f0e441900da57ec4198578d4e"},
{file = "bitsandbytes-0.49.1-py3-none-manylinux_2_24_x86_64.whl", hash = "sha256:e7940bf32457dc2e553685285b2a86e82f5ec10b2ae39776c408714f9ae6983c"},
{file = "bitsandbytes-0.49.1-py3-none-win_amd64.whl", hash = "sha256:6ead0763f4beb936f9a09acb49ec094a259180906fc0605d9ca0617249c3c798"},
]
[package.dependencies]
@ -590,24 +590,24 @@ pytest = ["pytest (>=7)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
name = "fsspec"
version = "2025.12.0"
version = "2026.1.0"
description = "File-system specification"
optional = false
python-versions = ">=3.10"
files = [
{file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
{file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
{file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
{file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
]
[package.extras]
@ -618,7 +618,7 @@ dask = ["dask", "distributed"]
dev = ["pre-commit", "ruff (>=0.5)"]
doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
dropbox = ["dropbox", "dropboxdrivefs", "requests"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
fuse = ["fusepy"]
gcs = ["gcsfs"]
git = ["pygit2"]
@ -635,7 +635,7 @@ smb = ["smbprotocol"]
ssh = ["paramiko"]
test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
tqdm = ["tqdm"]
[[package]]
@ -1327,21 +1327,21 @@ ssh = ["paramiko"]
[[package]]
name = "protobuf"
version = "6.33.2"
version = "6.33.3"
description = ""
optional = false
python-versions = ">=3.9"
files = [
{file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
{file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
{file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
{file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
{file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
{file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
{file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
{file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
{file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
{file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
{file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
{file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
{file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
{file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
]
[[package]]
@ -2350,4 +2350,4 @@ dev = ["pytest", "setuptools"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "1dd6c5ec9bea98c49f5a6401d4af6e4406a1940a1c91823b730838f9bdf18ac9"
content-hash = "6f8d1977f60d92124fb949b55a658c7e8e176458dc9e2a29ee1d7c52dec7ab26"

View File

@ -13,7 +13,7 @@ rotary-embedding-torch = "0.5.3"
fabric = "^3.2.2"
contexttimer = "^0.3.3"
ray = "^2.53.0"
protobuf = "^6.33.2"
protobuf = "^6.33.3"
bitsandbytes = ">=0.39.0"
rpyc = "6.0.0"
galore-torch = "^1.0"

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -522,13 +522,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -872,13 +872,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]
@ -1796,21 +1796,21 @@ files = [
[[package]]
name = "protobuf"
version = "6.33.2"
version = "6.33.3"
description = ""
optional = false
python-versions = ">=3.9"
files = [
{file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
{file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
{file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
{file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
{file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
{file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
{file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
{file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
{file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
{file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
{file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
{file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
{file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
{file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
]
[[package]]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]
@ -1343,21 +1343,21 @@ files = [
[[package]]
name = "protobuf"
version = "6.33.2"
version = "6.33.3"
description = ""
optional = false
python-versions = ">=3.9"
files = [
{file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
{file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
{file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
{file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
{file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
{file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
{file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
{file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
{file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
{file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
{file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
{file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
{file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
{file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
]
[[package]]
@ -2234,4 +2234,4 @@ propcache = ">=0.2.1"
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "072741ab4fb1cf908a6e193f60f71e7205d9e9d3c74839bc9a0d153046c675cf"
content-hash = "afb9d901c2b136e6d39517fb48361cf0e030ee48cdfb15f5486ac8d83e8f48ca"

View File

@ -9,7 +9,7 @@ readme = "README.md"
python = ">=3.10,<3.13"
datasets = "3.1.0"
evaluate = "^0.4.6"
protobuf = "^6.33.2"
protobuf = "^6.33.3"
rouge-score = "^0.1.2"
sentencepiece = "^0.2.1"
tiktoken = "^0.12.0"

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -453,13 +453,13 @@ torch = ["torch"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]

View File

@ -453,13 +453,13 @@ torch = ["torch"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]

View File

@ -176,24 +176,24 @@ files = [
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
name = "fsspec"
version = "2025.12.0"
version = "2026.1.0"
description = "File-system specification"
optional = false
python-versions = ">=3.10"
files = [
{file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
{file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
{file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
{file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
]
[package.extras]
@ -204,7 +204,7 @@ dask = ["dask", "distributed"]
dev = ["pre-commit", "ruff (>=0.5)"]
doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
dropbox = ["dropbox", "dropboxdrivefs", "requests"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
fuse = ["fusepy"]
gcs = ["gcsfs"]
git = ["pygit2"]
@ -221,7 +221,7 @@ smb = ["smbprotocol"]
ssh = ["paramiko"]
test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
tqdm = ["tqdm"]
[[package]]

View File

@ -168,24 +168,24 @@ files = [
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
name = "fsspec"
version = "2025.12.0"
version = "2026.1.0"
description = "File-system specification"
optional = false
python-versions = ">=3.10"
files = [
{file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
{file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
{file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
{file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
]
[package.extras]
@ -196,7 +196,7 @@ dask = ["dask", "distributed"]
dev = ["pre-commit", "ruff (>=0.5)"]
doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
dropbox = ["dropbox", "dropboxdrivefs", "requests"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
fuse = ["fusepy"]
gcs = ["gcsfs"]
git = ["pygit2"]
@ -213,7 +213,7 @@ smb = ["smbprotocol"]
ssh = ["paramiko"]
test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
tqdm = ["tqdm"]
[[package]]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -500,13 +500,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -782,13 +782,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -688,13 +688,13 @@ psutil = ["psutil (>=5.8.0)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -1917,13 +1917,13 @@ files = [
[[package]]
name = "openai"
version = "2.14.0"
version = "2.15.0"
description = "The official Python library for the openai API"
optional = false
python-versions = ">=3.9"
files = [
{file = "openai-2.14.0-py3-none-any.whl", hash = "sha256:7ea40aca4ffc4c4a776e77679021b47eec1160e341f42ae086ba949c9dcc9183"},
{file = "openai-2.14.0.tar.gz", hash = "sha256:419357bedde9402d23bf8f2ee372fca1985a73348debba94bddff06f19459952"},
{file = "openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3"},
{file = "openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba"},
]
[package.dependencies]
@ -2927,30 +2927,30 @@ six = ">=1.14.0"
[[package]]
name = "ruff"
version = "0.14.10"
version = "0.14.11"
description = "An extremely fast Python linter and code formatter, written in Rust."
optional = false
python-versions = ">=3.7"
files = [
{file = "ruff-0.14.10-py3-none-linux_armv6l.whl", hash = "sha256:7a3ce585f2ade3e1f29ec1b92df13e3da262178df8c8bdf876f48fa0e8316c49"},
{file = "ruff-0.14.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:674f9be9372907f7257c51f1d4fc902cb7cf014b9980152b802794317941f08f"},
{file = "ruff-0.14.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d85713d522348837ef9df8efca33ccb8bd6fcfc86a2cde3ccb4bc9d28a18003d"},
{file = "ruff-0.14.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6987ebe0501ae4f4308d7d24e2d0fe3d7a98430f5adfd0f1fead050a740a3a77"},
{file = "ruff-0.14.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16a01dfb7b9e4eee556fbfd5392806b1b8550c9b4a9f6acd3dbe6812b193c70a"},
{file = "ruff-0.14.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7165d31a925b7a294465fa81be8c12a0e9b60fb02bf177e79067c867e71f8b1f"},
{file = "ruff-0.14.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c561695675b972effb0c0a45db233f2c816ff3da8dcfbe7dfc7eed625f218935"},
{file = "ruff-0.14.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bb98fcbbc61725968893682fd4df8966a34611239c9fd07a1f6a07e7103d08e"},
{file = "ruff-0.14.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f24b47993a9d8cb858429e97bdf8544c78029f09b520af615c1d261bf827001d"},
{file = "ruff-0.14.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59aabd2e2c4fd614d2862e7939c34a532c04f1084476d6833dddef4afab87e9f"},
{file = "ruff-0.14.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:213db2b2e44be8625002dbea33bb9c60c66ea2c07c084a00d55732689d697a7f"},
{file = "ruff-0.14.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b914c40ab64865a17a9a5b67911d14df72346a634527240039eb3bd650e5979d"},
{file = "ruff-0.14.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1484983559f026788e3a5c07c81ef7d1e97c1c78ed03041a18f75df104c45405"},
{file = "ruff-0.14.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c70427132db492d25f982fffc8d6c7535cc2fd2c83fc8888f05caaa248521e60"},
{file = "ruff-0.14.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5bcf45b681e9f1ee6445d317ce1fa9d6cba9a6049542d1c3d5b5958986be8830"},
{file = "ruff-0.14.10-py3-none-win32.whl", hash = "sha256:104c49fc7ab73f3f3a758039adea978869a918f31b73280db175b43a2d9b51d6"},
{file = "ruff-0.14.10-py3-none-win_amd64.whl", hash = "sha256:466297bd73638c6bdf06485683e812db1c00c7ac96d4ddd0294a338c62fdc154"},
{file = "ruff-0.14.10-py3-none-win_arm64.whl", hash = "sha256:e51d046cf6dda98a4633b8a8a771451107413b0f07183b2bef03f075599e44e6"},
{file = "ruff-0.14.10.tar.gz", hash = "sha256:9a2e830f075d1a42cd28420d7809ace390832a490ed0966fe373ba288e77aaf4"},
{file = "ruff-0.14.11-py3-none-linux_armv6l.whl", hash = "sha256:f6ff2d95cbd335841a7217bdfd9c1d2e44eac2c584197ab1385579d55ff8830e"},
{file = "ruff-0.14.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f6eb5c1c8033680f4172ea9c8d3706c156223010b8b97b05e82c59bdc774ee6"},
{file = "ruff-0.14.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f2fc34cc896f90080fca01259f96c566f74069a04b25b6205d55379d12a6855e"},
{file = "ruff-0.14.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53386375001773ae812b43205d6064dae49ff0968774e6befe16a994fc233caa"},
{file = "ruff-0.14.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a697737dce1ca97a0a55b5ff0434ee7205943d4874d638fe3ae66166ff46edbe"},
{file = "ruff-0.14.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6845ca1da8ab81ab1dce755a32ad13f1db72e7fba27c486d5d90d65e04d17b8f"},
{file = "ruff-0.14.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:e36ce2fd31b54065ec6f76cb08d60159e1b32bdf08507862e32f47e6dde8bcbf"},
{file = "ruff-0.14.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:590bcc0e2097ecf74e62a5c10a6b71f008ad82eb97b0a0079e85defe19fe74d9"},
{file = "ruff-0.14.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:53fe71125fc158210d57fe4da26e622c9c294022988d08d9347ec1cf782adafe"},
{file = "ruff-0.14.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a35c9da08562f1598ded8470fcfef2afb5cf881996e6c0a502ceb61f4bc9c8a3"},
{file = "ruff-0.14.11-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:0f3727189a52179393ecf92ec7057c2210203e6af2676f08d92140d3e1ee72c1"},
{file = "ruff-0.14.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:eb09f849bd37147a789b85995ff734a6c4a095bed5fd1608c4f56afc3634cde2"},
{file = "ruff-0.14.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:c61782543c1231bf71041461c1f28c64b961d457d0f238ac388e2ab173d7ecb7"},
{file = "ruff-0.14.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:82ff352ea68fb6766140381748e1f67f83c39860b6446966cff48a315c3e2491"},
{file = "ruff-0.14.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:728e56879df4ca5b62a9dde2dd0eb0edda2a55160c0ea28c4025f18c03f86984"},
{file = "ruff-0.14.11-py3-none-win32.whl", hash = "sha256:337c5dd11f16ee52ae217757d9b82a26400be7efac883e9e852646f1557ed841"},
{file = "ruff-0.14.11-py3-none-win_amd64.whl", hash = "sha256:f981cea63d08456b2c070e64b79cb62f951aa1305282974d4d5216e6e0178ae6"},
{file = "ruff-0.14.11-py3-none-win_arm64.whl", hash = "sha256:649fb6c9edd7f751db276ef42df1f3df41c38d67d199570ae2a7bd6cbc3590f0"},
{file = "ruff-0.14.11.tar.gz", hash = "sha256:f6dc463bfa5c07a59b1ff2c3b9767373e541346ea105503b4c0369c520a66958"},
]
[[package]]
@ -3863,4 +3863,4 @@ propcache = ">=0.2.1"
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "15be197852e2bbd2af31018599c8327bb9e7c547e9cb06b805f17c94d126948c"
content-hash = "851a9b014df9acf65531d771edf4c10d381d0705c4b7bfc59102ea3c3d9ae963"

View File

@ -19,7 +19,7 @@ gradio = "4.44.1"
mdtex2html = "^1.3.2"
sse-starlette = "^3.1.2"
aiohttp-sse-client = "^0.2.1"
openai = "^2.14.0"
openai = "^2.15.0"
[build-system]

View File

@ -464,13 +464,13 @@ torch = ["torch"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]

View File

@ -591,13 +591,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -945,13 +945,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -510,13 +510,13 @@ torch = ["torch"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -1681,21 +1681,21 @@ files = [
[[package]]
name = "protobuf"
version = "6.33.2"
version = "6.33.3"
description = ""
optional = false
python-versions = ">=3.9"
files = [
{file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
{file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
{file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
{file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
{file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
{file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
{file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
{file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
{file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
{file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
{file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
{file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
{file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
{file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
]
[[package]]

View File

@ -516,13 +516,13 @@ profile = ["gprof2dot (>=2022.7.29)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -454,13 +454,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -736,13 +736,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -400,13 +400,13 @@ files = [
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -550,13 +550,13 @@ files = [
[[package]]
name = "google-api-core"
version = "2.28.1"
version = "2.29.0"
description = "Google API client core library"
optional = false
python-versions = ">=3.7"
files = [
{file = "google_api_core-2.28.1-py3-none-any.whl", hash = "sha256:4021b0f8ceb77a6fb4de6fde4502cecab45062e66ff4f2895169e0b35bc9466c"},
{file = "google_api_core-2.28.1.tar.gz", hash = "sha256:2b405df02d68e68ce0fbc138559e6036559e685159d148ae5861013dc201baf8"},
{file = "google_api_core-2.29.0-py3-none-any.whl", hash = "sha256:d30bc60980daa36e314b5d5a3e5958b0200cb44ca8fa1be2b614e932b75a3ea9"},
{file = "google_api_core-2.29.0.tar.gz", hash = "sha256:84181be0f8e6b04006df75ddfe728f24489f0af57c96a529ff7cf45bc28797f7"},
]
[package.dependencies]
@ -1286,21 +1286,21 @@ testing = ["google-api-core (>=1.31.5)"]
[[package]]
name = "protobuf"
version = "6.33.2"
version = "6.33.3"
description = ""
optional = false
python-versions = ">=3.9"
files = [
{file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
{file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
{file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
{file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
{file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
{file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
{file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
{file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
{file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
{file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
{file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
{file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
{file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
{file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
]
[[package]]
@ -1908,13 +1908,13 @@ zstd = ["backports-zstd (>=1.0.0)"]
[[package]]
name = "virtualenv"
version = "20.36.0"
version = "20.36.1"
description = "Virtual Python Environment builder"
optional = false
python-versions = ">=3.8"
files = [
{file = "virtualenv-20.36.0-py3-none-any.whl", hash = "sha256:e7ded577f3af534fd0886d4ca03277f5542053bedb98a70a989d3c22cfa5c9ac"},
{file = "virtualenv-20.36.0.tar.gz", hash = "sha256:a3601f540b515a7983508113f14e78993841adc3d83710fa70f0ac50f43b23ed"},
{file = "virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f"},
{file = "virtualenv-20.36.1.tar.gz", hash = "sha256:8befb5c81842c641f8ee658481e42641c68b5eab3521d8e092d18320902466ba"},
]
[package.dependencies]

View File

@ -489,13 +489,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -771,13 +771,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "huggingface-hub"
version = "1.2.4"
version = "1.3.1"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.9.0"
files = [
{file = "huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9"},
{file = "huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5"},
{file = "huggingface_hub-1.3.1-py3-none-any.whl", hash = "sha256:efbc7f3153cb84e2bb69b62ed90985e21ecc9343d15647a419fc0ee4b85f0ac3"},
{file = "huggingface_hub-1.3.1.tar.gz", hash = "sha256:e80e0cfb4a75557c51ab20d575bdea6bb6106c2f97b7c75d8490642f1efb6df5"},
]
[package.dependencies]

View File

@ -524,13 +524,13 @@ dev = ["Sphinx (==2.1.0)", "future (==0.17.1)", "numpy (==1.16.4)", "pytest (==4
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -674,13 +674,13 @@ files = [
[[package]]
name = "fsspec"
version = "2025.12.0"
version = "2026.1.0"
description = "File-system specification"
optional = false
python-versions = ">=3.10"
files = [
{file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
{file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
{file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
{file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
]
[package.extras]
@ -691,7 +691,7 @@ dask = ["dask", "distributed"]
dev = ["pre-commit", "ruff (>=0.5)"]
doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
dropbox = ["dropbox", "dropboxdrivefs", "requests"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
fuse = ["fusepy"]
gcs = ["gcsfs"]
git = ["pygit2"]
@ -708,7 +708,7 @@ smb = ["smbprotocol"]
ssh = ["paramiko"]
test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
tqdm = ["tqdm"]
[[package]]

View File

@ -552,13 +552,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -1972,13 +1972,13 @@ test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1
[[package]]
name = "peft"
version = "0.18.0"
version = "0.18.1"
description = "Parameter-Efficient Fine-Tuning (PEFT)"
optional = false
python-versions = ">=3.10.0"
files = [
{file = "peft-0.18.0-py3-none-any.whl", hash = "sha256:624f69ca6393b765ccc6734adda7ca57d80b238f0900a42c357d8b67a03d62ff"},
{file = "peft-0.18.0.tar.gz", hash = "sha256:c81c80b2056ab40c23d58ef25f74daab417ac653970718589a11a8af28218588"},
{file = "peft-0.18.1-py3-none-any.whl", hash = "sha256:0bf06847a3551e3019fc58c440cffc9a6b73e6e2962c95b52e224f77bbdb50f1"},
{file = "peft-0.18.1.tar.gz", hash = "sha256:2dd0d6bfce936d1850e48aaddbd250941c5c02fc8ef3237cd8fd5aac35e0bae2"},
]
[package.dependencies]

View File

@ -1,4 +1,4 @@
{
"commit_hash": "b85c447ceb1ff91c5d4df6b71de2256a5fabfe9d",
"timestamp": "2026-01-08T02:42:38Z"
"commit_hash": "c0e25e54181528c8e0818e2e9bc22fe5a889b8cc",
"timestamp": "2026-01-12T02:39:25Z"
}

View File

@ -435,19 +435,19 @@ urllib3 = ">=1.25.3,<3"
[[package]]
name = "build"
version = "1.3.0"
version = "1.4.0"
description = "A simple, correct Python build frontend"
optional = false
python-versions = ">=3.9"
files = [
{file = "build-1.3.0-py3-none-any.whl", hash = "sha256:7145f0b5061ba90a1500d60bd1b13ca0a8a4cebdd0cc16ed8adf1c0e739f43b4"},
{file = "build-1.3.0.tar.gz", hash = "sha256:698edd0ea270bde950f53aed21f3a0135672206f3911e0176261a31e0e07b397"},
{file = "build-1.4.0-py3-none-any.whl", hash = "sha256:6a07c1b8eb6f2b311b96fcbdbce5dab5fe637ffda0fd83c9cac622e927501596"},
{file = "build-1.4.0.tar.gz", hash = "sha256:f1b91b925aa322be454f8330c6fb48b465da993d1e7e7e6fa35027ec49f3c936"},
]
[package.dependencies]
colorama = {version = "*", markers = "os_name == \"nt\""}
importlib-metadata = {version = ">=4.6", markers = "python_full_version < \"3.10.2\""}
packaging = ">=19.1"
packaging = ">=24.0"
pyproject_hooks = "*"
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
@ -1144,13 +1144,13 @@ standard-no-fastapi-cloud-cli = ["email-validator (>=2.0.0)", "fastapi-cli[stand
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -3462,13 +3462,13 @@ onnx = ">=1.14.0"
[[package]]
name = "openai"
version = "2.14.0"
version = "2.15.0"
description = "The official Python library for the openai API"
optional = false
python-versions = ">=3.9"
files = [
{file = "openai-2.14.0-py3-none-any.whl", hash = "sha256:7ea40aca4ffc4c4a776e77679021b47eec1160e341f42ae086ba949c9dcc9183"},
{file = "openai-2.14.0.tar.gz", hash = "sha256:419357bedde9402d23bf8f2ee372fca1985a73348debba94bddff06f19459952"},
{file = "openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3"},
{file = "openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba"},
]
[package.dependencies]
@ -3715,13 +3715,13 @@ test = ["importlib_metadata (>=2.0)", "pytest (>=6.0)"]
[[package]]
name = "peft"
version = "0.18.0"
version = "0.18.1"
description = "Parameter-Efficient Fine-Tuning (PEFT)"
optional = false
python-versions = ">=3.10.0"
files = [
{file = "peft-0.18.0-py3-none-any.whl", hash = "sha256:624f69ca6393b765ccc6734adda7ca57d80b238f0900a42c357d8b67a03d62ff"},
{file = "peft-0.18.0.tar.gz", hash = "sha256:c81c80b2056ab40c23d58ef25f74daab417ac653970718589a11a8af28218588"},
{file = "peft-0.18.1-py3-none-any.whl", hash = "sha256:0bf06847a3551e3019fc58c440cffc9a6b73e6e2962c95b52e224f77bbdb50f1"},
{file = "peft-0.18.1.tar.gz", hash = "sha256:2dd0d6bfce936d1850e48aaddbd250941c5c02fc8ef3237cd8fd5aac35e0bae2"},
]
[package.dependencies]
@ -4045,21 +4045,21 @@ files = [
[[package]]
name = "protobuf"
version = "6.33.2"
version = "6.33.3"
description = ""
optional = false
python-versions = ">=3.9"
files = [
{file = "protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d"},
{file = "protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4"},
{file = "protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872"},
{file = "protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f"},
{file = "protobuf-6.33.2-cp39-cp39-win32.whl", hash = "sha256:7109dcc38a680d033ffb8bf896727423528db9163be1b6a02d6a49606dcadbfe"},
{file = "protobuf-6.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:2981c58f582f44b6b13173e12bb8656711189c2a70250845f264b877f00b1913"},
{file = "protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c"},
{file = "protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4"},
{file = "protobuf-6.33.3-cp310-abi3-win32.whl", hash = "sha256:b4046f9f2ede57ad5b1d9917baafcbcad42f8151a73c755a1e2ec9557b0a764f"},
{file = "protobuf-6.33.3-cp310-abi3-win_amd64.whl", hash = "sha256:1fd18f030ae9df97712fbbb0849b6e54c63e3edd9b88d8c3bb4771f84d8db7a4"},
{file = "protobuf-6.33.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:648b7b0144222eb06cf529a3d7b01333c5f30b4196773b682d388f04db373759"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:08a6ca12f60ba99097dd3625ef4275280f99c9037990e47ce9368826b159b890"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:642fce7187526c98683c79a3ad68e5d646a5ef5eb004582fe123fc9a33a9456b"},
{file = "protobuf-6.33.3-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:6fa9b5f4baa12257542273e5e6f3c3d3867b30bc2770c14ad9ac8315264bf986"},
{file = "protobuf-6.33.3-cp39-cp39-win32.whl", hash = "sha256:c46dcc47b243b299f4f7eabeed21929c07f0d36fffe2ea8431793b53c308ab80"},
{file = "protobuf-6.33.3-cp39-cp39-win_amd64.whl", hash = "sha256:2756963dcfd414eba46bcbb341f0e2c652036e5d700f112b3bb90fa1a031893a"},
{file = "protobuf-6.33.3-py3-none-any.whl", hash = "sha256:c2bf221076b0d463551efa2e1319f08d4cffcc5f0d864614ccd3d0e77a637794"},
{file = "protobuf-6.33.3.tar.gz", hash = "sha256:c8794debeb402963fddff41a595e1f649bcd76616ba56c835645cab4539e810e"},
]
[[package]]
@ -5540,53 +5540,58 @@ testing = ["datasets", "numpy", "pytest", "pytest-asyncio", "requests", "ruff",
[[package]]
name = "tomli"
version = "2.3.0"
version = "2.4.0"
description = "A lil' TOML parser"
optional = false
python-versions = ">=3.8"
files = [
{file = "tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45"},
{file = "tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba"},
{file = "tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf"},
{file = "tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441"},
{file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845"},
{file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c"},
{file = "tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456"},
{file = "tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be"},
{file = "tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac"},
{file = "tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22"},
{file = "tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f"},
{file = "tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52"},
{file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8"},
{file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6"},
{file = "tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876"},
{file = "tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878"},
{file = "tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b"},
{file = "tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae"},
{file = "tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b"},
{file = "tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf"},
{file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f"},
{file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05"},
{file = "tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606"},
{file = "tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999"},
{file = "tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e"},
{file = "tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3"},
{file = "tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc"},
{file = "tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0"},
{file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879"},
{file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005"},
{file = "tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463"},
{file = "tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8"},
{file = "tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77"},
{file = "tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf"},
{file = "tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530"},
{file = "tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b"},
{file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67"},
{file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f"},
{file = "tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0"},
{file = "tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba"},
{file = "tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b"},
{file = "tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549"},
{file = "tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867"},
{file = "tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9"},
{file = "tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95"},
{file = "tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76"},
{file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d"},
{file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576"},
{file = "tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a"},
{file = "tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa"},
{file = "tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614"},
{file = "tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1"},
{file = "tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8"},
{file = "tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a"},
{file = "tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1"},
{file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b"},
{file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51"},
{file = "tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729"},
{file = "tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da"},
{file = "tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3"},
{file = "tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0"},
{file = "tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e"},
{file = "tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4"},
{file = "tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e"},
{file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c"},
{file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f"},
{file = "tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86"},
{file = "tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87"},
{file = "tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132"},
{file = "tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6"},
{file = "tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc"},
{file = "tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66"},
{file = "tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d"},
{file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702"},
{file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8"},
{file = "tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776"},
{file = "tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475"},
{file = "tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2"},
{file = "tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9"},
{file = "tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0"},
{file = "tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df"},
{file = "tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d"},
{file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f"},
{file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b"},
{file = "tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087"},
{file = "tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd"},
{file = "tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4"},
{file = "tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a"},
{file = "tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c"},
]
[[package]]
@ -6339,4 +6344,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "dab9694d64d1c91b512eb62bbd31da9d0cdb8c93e99941a7022f2f46aea905e3"
content-hash = "f17eedd404a2af6728d14710809ea47ad34bc6672c035073bad9e6c709131a08"

View File

@ -9,7 +9,7 @@ readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.10,<3.13"
accelerate = ">=1.7.0"
build = "^1.3.0"
build = "^1.4.0"
colored = "^2.3.1"
cuda-python = ">=13"
diffusers = ">=0.27.0"
@ -18,7 +18,7 @@ mpi4py = "^4.1.1"
numpy = "<2"
onnx = ">=1.18.0,<1.20.0"
onnx-graphsurgeon = ">=0.5.2"
openai = "^2.14.0"
openai = "^2.15.0"
polygraphy = "^0.49.26"
psutil = "^7.2.1"
nvidia-ml-py = ">=13"
@ -53,7 +53,7 @@ starlette = ">=0.49.1"
uvicorn = "^0.40.0"
setuptools = "<80"
ordered-set = "^4.1.0"
peft = "^0.18.0"
peft = "^0.18.1"
patchelf = "^0.17.2.4"
einops = "^0.8.1"
flashinfer-python = ">=0.3.0,<0.4.0"

View File

@ -146,13 +146,13 @@ files = [
[[package]]
name = "filelock"
version = "3.20.2"
version = "3.20.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
files = [
{file = "filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8"},
{file = "filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64"},
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
@ -174,13 +174,13 @@ test = ["hypothesis (<6.136.0)", "levenshtein (<=0.27.1)", "pip", "pylint (<3.3.
[[package]]
name = "fsspec"
version = "2025.12.0"
version = "2026.1.0"
description = "File-system specification"
optional = false
python-versions = ">=3.10"
files = [
{file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
{file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
{file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
{file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
]
[package.extras]
@ -191,7 +191,7 @@ dask = ["dask", "distributed"]
dev = ["pre-commit", "ruff (>=0.5)"]
doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
dropbox = ["dropbox", "dropboxdrivefs", "requests"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
fuse = ["fusepy"]
gcs = ["gcsfs"]
git = ["pygit2"]
@ -208,7 +208,7 @@ smb = ["smbprotocol"]
ssh = ["paramiko"]
test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
tqdm = ["tqdm"]
[[package]]

View File

@ -128,7 +128,6 @@ else:
]
package_data += [
'bindings.pyi',
'bindings/*.pyi',
'tools/plugin_gen/templates/*',
'bench/build/benchmark_config.yml',

View File

@ -17,7 +17,7 @@ from tensorrt_llm._torch.modules.multi_stream_utils import \
maybe_execute_in_parallel
from tensorrt_llm._torch.modules.rotary_embedding import RotaryEmbedding
from tensorrt_llm._torch.pyexecutor.resource_manager import KVCacheManager
from tensorrt_llm._torch.utils import maybe_compile
from tensorrt_llm._torch.utils import maybe_compile, maybe_compiled_cat
from tensorrt_llm._utils import get_size_in_bytes, get_sm_version
from tensorrt_llm.bindings import DataType
from tensorrt_llm.bindings.executor import KvCacheConfig
@ -1047,12 +1047,11 @@ class Indexer(nn.Module):
# Indexer should just process the current MLA chunk as a single chunk
has_mla_chunked_prefill = (
metadata.enable_context_mla_with_cached_kv
and host_cached_tokens.sum().item() > 0
and metadata.runtime_features.chunked_prefill)
if has_mla_chunked_prefill:
# The MLA has already split the sequence, here just process what's given (as a single chunk)
# Cached token info is derived from metadata.host_ctx_cached_token_indptr in prepare_one_prefill_chunk
# MLA chunked prefill is active - use single-chunk pattern for
# indexer prefill chunks.
chunk_specs = [(i, 0, host_seq_lens[i].item(),
host_seq_lens[:i].sum().item() if i > 0 else 0)
for i in range(num_contexts)]
@ -1063,7 +1062,8 @@ class Indexer(nn.Module):
)
]
else:
# Normal mode: use indexer's own chunking logic to prevent L^2 complexity when long-sequence is used.
# Use indexer's own chunking logic to prevent L^2 complexity of indexer MQA logits computation for long sequences.
# This is only used when MLA chunked prefill is not enabled.
chunk_groups = split_prefill_chunks(
host_seq_lens,
metadata.indexer_max_chunk_size,
@ -1541,7 +1541,7 @@ class Indexer(nn.Module):
def _prep_q_or_k(self, qk_pe: torch.Tensor, qk_nope: torch.Tensor):
"""Concatenate, rotate, and FP8 quantize for Q or K"""
q_or_k = torch.cat([qk_pe, qk_nope], dim=-1)
q_or_k = maybe_compiled_cat([qk_pe, qk_nope], dim=-1)
q_or_k = rotate_activation(q_or_k)
q_or_k = q_or_k.view(-1, self.head_dim)
q_or_k = fp8_utils.fp8_quantize_1x128_sf_transpose(

View File

@ -5,6 +5,7 @@ from .modeling_bert import BertForSequenceClassification
from .modeling_clip import CLIPVisionModel
from .modeling_deepseekv3 import DeepseekV3ForCausalLM
from .modeling_exaone4 import Exaone4ForCausalLM
from .modeling_exaone_moe import ExaoneMoeForCausalLM
from .modeling_gemma3 import Gemma3ForCausalLM
from .modeling_gemma3vl import Gemma3VLM
from .modeling_glm import Glm4MoeForCausalLM
@ -44,6 +45,7 @@ __all__ = [
"CLIPVisionModel",
"DeepseekV3ForCausalLM",
"Exaone4ForCausalLM",
"ExaoneMoeForCausalLM",
"Gemma3ForCausalLM",
"Gemma3VLM",
"HCXVisionForCausalLM",

View File

@ -0,0 +1,581 @@
import math
import os
import re
from typing import Dict, List, Optional, Tuple
import torch
from torch import nn
from tensorrt_llm._ipc_utils import can_access_peer
from tensorrt_llm._torch.modules.qk_norm_attention import QKNormRoPEAttention
from tensorrt_llm.functional import PositionEmbeddingType
from tensorrt_llm.mapping import Mapping
from tensorrt_llm.models.modeling_utils import QuantConfig
from tensorrt_llm.quantization import QuantAlgo
from ...logger import logger
from ..attention_backend import AttentionMetadata
from ..attention_backend.interface import (
PositionalEmbeddingParams,
PredefinedAttentionMask,
RopeParams,
)
from ..distributed import (
AllReduce,
AllReduceFusionOp,
AllReduceParams,
MoEAllReduce,
MoEAllReduceParams,
)
from ..model_config import ModelConfig
from ..models.modeling_deepseekv3 import Deepseekv3MoE
from ..modules.decoder_layer import DecoderLayer
from ..modules.embedding import Embedding
from ..modules.gated_mlp import GatedMLP
from ..modules.linear import TensorParallelMode
from ..modules.rms_norm import RMSNorm
from ..utils import AuxStreamType, Fp4QuantizedTensor
from .modeling_utils import (
DecoderModel,
DecoderModelForCausalLM,
EagerFusionConfig,
register_auto_model,
)
# fmt: off
# TODO: Remove this once we have a proper transformers package
from transformers import AutoConfig, PretrainedConfig # isort: skip
class ExaoneMoEConfig(PretrainedConfig):
model_type = "exaone_moe"
logger.warning_once(
"transformers does not support 'ExaoneMoEConfig'. "
"Register ExaoneMoEConfig to mimic the ExaoneMoE model.",
key="EXAONE_MOE_REGISTER_WARNING"
)
AutoConfig.register(ExaoneMoEConfig.model_type, ExaoneMoEConfig)
# End of the config register.
# fmt: on
def check_is_moe(config: ExaoneMoEConfig, layer_idx: int) -> bool:
"""
Check if the current layer is a MoE layer.
"""
return hasattr(config, "is_moe_layer") and config.is_moe_layer[layer_idx]
def enable_attn_allreduce(mapping: Mapping):
return not mapping.enable_attention_dp or mapping.has_tp()
class ExaoneMoeAttention(QKNormRoPEAttention):
def __init__(
self,
model_config: ModelConfig[ExaoneMoEConfig],
layer_idx: Optional[int] = None,
fuse_qk_norm_rope: bool = False,
disable_deep_gemm: bool = False,
):
config = model_config.pretrained_config
self.attention_window_size = None
self.is_sliding = config.layer_types[layer_idx] == "sliding_attention"
# NOTE: In ExaoneMoe, only sliding layers apply rope.
pos_embd_params = None
if self.is_sliding:
self.attention_window_size = config.sliding_window
pos_embd_params = PositionalEmbeddingParams(
type=PositionEmbeddingType.rope_gpt_neox,
rope=RopeParams.from_config(config),
)
fuse_qk_norm_rope = self.is_sliding and fuse_qk_norm_rope
# NOTE: Fusing qk norm with rope has an issue that slightly hurts accuracy.
assert not fuse_qk_norm_rope, "Fusing qk norm and rope is having issue now"
super().__init__(
hidden_size=config.hidden_size,
num_attention_heads=config.num_attention_heads,
num_key_value_heads=config.num_key_value_heads,
max_position_embeddings=config.max_position_embeddings,
bias=False,
pos_embd_params=pos_embd_params,
fuse_qk_norm_rope=fuse_qk_norm_rope,
skip_rope=not self.is_sliding,
layer_idx=layer_idx,
dtype=config.torch_dtype,
config=model_config,
disable_deep_gemm=disable_deep_gemm,
reduce_output=enable_attn_allreduce(model_config.mapping),
)
def forward(
self,
position_ids: Optional[torch.LongTensor],
hidden_states: torch.Tensor,
attn_metadata: AttentionMetadata,
attention_mask: PredefinedAttentionMask = PredefinedAttentionMask.CAUSAL,
lora_params: Optional[dict] = None,
**kwargs,
) -> torch.Tensor:
return super().forward(
position_ids=position_ids,
hidden_states=hidden_states,
attn_metadata=attn_metadata,
attention_mask=attention_mask,
lora_params=lora_params,
attention_window_size=self.attention_window_size,
**kwargs,
)
class ExaoneMoeSparseMoEBlock(Deepseekv3MoE):
"""
ExaoneMoe Sparse MoE Block Layer.
It follows DeepSeek-V3 implementation.
"""
class ExaoneMoeDecoderLayer(DecoderLayer):
def __init__(
self,
model_config: ModelConfig[ExaoneMoEConfig],
aux_stream_dict: Dict[AuxStreamType, torch.cuda.Stream],
layer_idx: int,
):
super().__init__()
self.model_config = model_config
config = model_config.pretrained_config
self.layer_idx = layer_idx
self.mapping = model_config.mapping
mapping = self.mapping
self.enable_attention_dp = mapping.enable_attention_dp
self.mlp_tp_size = mapping.tp_size
self.is_p2p_supported = can_access_peer(mapping)
self.fusion_config = EagerFusionConfig()
# MoE fusions are disabled by default in K-EXAONE since
# it may cause a slight accuracy drop due to numerical gap.
self.enable_fusion = os.environ.get("TRTLLM_EXAONE_EAGER_FUSION_ENABLED", "0") == "1"
self.enable_fusion &= not self.enable_attention_dp
# FIXME: incompatible with mixed quantization mode
quant_config = self._get_decoder_layer_quant_config(model_config, layer_idx)
self.is_nvfp4 = quant_config.layer_quant_mode.has_nvfp4()
assert quant_config.quant_algo is not QuantAlgo.MIXED_PRECISION, (
"MIXED_PRECISION is ambiguous"
)
self.allreduce = None
self.moe_allreduce = None
if not self.enable_attention_dp and self.mapping.tp_size > 1:
self.allreduce = AllReduce(
mapping=model_config.mapping,
strategy=model_config.allreduce_strategy,
dtype=config.torch_dtype,
)
self.moe_allreduce = MoEAllReduce(self.mapping)
has_tp = mapping.has_tp()
has_pp = mapping.has_pp()
# Submodule definitions
self.input_layernorm = RMSNorm(
hidden_size=config.hidden_size, eps=config.rms_norm_eps, dtype=config.torch_dtype
)
self.self_attn = ExaoneMoeAttention(model_config, layer_idx=layer_idx)
# MoE or Dense layer
self.is_moe_layer = check_is_moe(config, layer_idx)
if self.is_moe_layer:
self.fusion_config.PRE_MOE_FUSION = self.enable_fusion and has_tp
self.fusion_config.POST_MOE_FUSION = self.fusion_config.PRE_MOE_FUSION and not has_pp
self.mlp = ExaoneMoeSparseMoEBlock(
num_experts=config.num_experts,
top_k=config.num_experts_per_tok,
hidden_size=config.hidden_size,
intermediate_size=config.moe_intermediate_size,
shared_expert_intermediate_size=config.moe_intermediate_size
* config.num_shared_experts,
dtype=config.torch_dtype,
model_config=model_config,
override_quant_config=quant_config,
aux_stream_dict=aux_stream_dict,
layer_idx=layer_idx,
)
else:
block_size = 1
if quant_config.quant_algo is None and quant_config.group_size is not None:
block_size = quant_config.group_size
self.mlp_tp_size = self._compute_mlp_tp_size(config.intermediate_size, block_size)
has_mlp_tp = self.mlp_tp_size > 1
self.fusion_config.PRE_MLP_FUSION = self.enable_fusion and has_mlp_tp and self.is_nvfp4
self.fusion_config.POST_MLP_FUSION = self.enable_fusion and has_mlp_tp
self.mlp = GatedMLP(
hidden_size=config.hidden_size,
intermediate_size=config.intermediate_size,
bias=False,
dtype=config.torch_dtype,
config=model_config,
# Keep sharding consistent with computed mlp_tp_size.
# In attention-DP, mlp_tp_size==1 -> disable TP sharding here.
overridden_tp_size=self.mlp_tp_size,
layer_idx=layer_idx,
reduce_output=has_mlp_tp,
)
self.disable_attn_allreduce = (
self.fusion_config.PRE_MOE_FUSION
or self.fusion_config.PRE_MLP_FUSION
or self.mapping.tp_size == 1
or self.enable_attention_dp
)
self.post_attention_layernorm = RMSNorm(
hidden_size=config.hidden_size, eps=config.rms_norm_eps, dtype=config.torch_dtype
)
self.next_layer_layernorm: RMSNorm = None
def _get_decoder_layer_quant_config(
self, model_config: ModelConfig[ExaoneMoEConfig], layer_idx: int
):
"""
The MTP layer in the nvfp4 checkpoint is unquantized. Because the TRTLLM
moe_backend only supports fp8/fp4 quantization, we need to override
the quant_config for the MTP layer.
"""
quant_config = model_config.quant_config
layer_name = f"model.layers.{layer_idx}"
if quant_config.is_module_excluded_from_quantization(layer_name):
return QuantConfig(
quant_algo=None,
kv_cache_quant_algo=quant_config.kv_cache_quant_algo,
)
else:
return model_config.quant_config
def _compute_mlp_tp_size(self, intermediate_size: int, block_size: int) -> int:
"""Adopted from DeepseekV3DecoderLayer._compute_mlp_tp_size."""
assert intermediate_size % block_size == 0, (
f"intermediate_size {intermediate_size} must be divisible by block_size {block_size}."
)
if self.enable_attention_dp:
# If using attention DP, the MLP also uses DP instead of TP.
mlp_tp_size = 1
else:
# The two math.gcd operations ensure that mlp_tp_size falls in the candidate TP sizes.
tp = math.gcd(
intermediate_size // block_size,
self.mapping.tp_size,
)
if tp > self.mapping.gpus_per_node:
mlp_tp_size = math.gcd(
tp,
self.mapping.gpus_per_node,
) # Avoid costly inter-node TP
else:
mlp_tp_size = tp
return mlp_tp_size
def forward(
self,
position_ids: torch.LongTensor,
hidden_states: torch.Tensor,
attn_metadata: AttentionMetadata,
residual: Optional[torch.Tensor] = None,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
# LN has neem already applied at the previous layer except the first layer.
if residual is None:
residual = hidden_states
hidden_states = self.input_layernorm(hidden_states)
hidden_states = self.self_attn(
position_ids=position_ids,
hidden_states=hidden_states,
attn_metadata=attn_metadata,
all_reduce_params=AllReduceParams(enable_allreduce=not (self.disable_attn_allreduce)),
**kwargs,
)
if self.is_moe_layer:
hidden_states, residual = self.forward_moe(
hidden_states=hidden_states,
attn_metadata=attn_metadata,
residual=residual,
)
else:
hidden_states, residual = self.forward_mlp(
hidden_states=hidden_states,
residual=residual,
)
return hidden_states, residual
def forward_moe(
self,
hidden_states: torch.Tensor,
attn_metadata: AttentionMetadata,
residual: torch.Tensor,
) -> Tuple[torch.Tensor, torch.Tensor]:
def _run_moe(hidden_states, hidden_states_fp4, do_finalize):
return self.mlp(
hidden_states,
hidden_states_fp4,
all_rank_num_tokens=attn_metadata.all_rank_num_tokens,
final_all_reduce_params=AllReduceParams(
enable_allreduce=not (
self.fusion_config.POST_MOE_FUSION or self.mapping.tp_size == 1
)
),
do_finalize=do_finalize,
)
if self.fusion_config.PRE_MOE_FUSION:
# moe_backend can be either CUTLASS or TRTLLM here
hidden_states, residual = self.allreduce(
hidden_states,
all_reduce_params=AllReduceParams(
fusion_op=AllReduceFusionOp.RESIDUAL_RMS_NORM,
residual=residual,
norm_weight=self.post_attention_layernorm.weight,
eps=self.post_attention_layernorm.variance_epsilon,
trigger_completion_at_end=False,
),
)
else:
# No fusion
hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
# Note: this fusion pattern is only supported for single-node TRTLLM-nvfp4 backend now
do_finalize = self.mapping.is_multi_node() or (
not (
self.fusion_config.POST_MOE_FUSION
and hidden_states.shape[0] <= self.moe_allreduce.max_token
and self.model_config.moe_backend == "TRTLLM"
and self.mlp.experts.has_nvfp4
and self.is_p2p_supported
)
)
hidden_states = _run_moe(hidden_states, hidden_states_fp4=None, do_finalize=do_finalize)
if self.fusion_config.POST_MOE_FUSION:
if do_finalize:
hidden_states, residual = self.allreduce(
hidden_states,
all_reduce_params=AllReduceParams(
fusion_op=AllReduceFusionOp.RESIDUAL_RMS_NORM,
residual=residual,
norm_weight=self.next_layer_layernorm.weight,
eps=self.next_layer_layernorm.variance_epsilon,
trigger_completion_at_end=False,
),
)
else:
assert len(hidden_states) == 4, "hidden_states must have 4 elements"
shared_output = hidden_states[0]
fc2_output = hidden_states[1]
expert_scale_factor = hidden_states[2]
expanded_idx_to_permuted_idx = hidden_states[3]
moe_all_reduce_params = MoEAllReduceParams(
expanded_idx_to_permuted_idx=expanded_idx_to_permuted_idx,
expert_scale_factor=expert_scale_factor,
shared_expert_output=shared_output,
residual=residual,
norm_weight=self.next_layer_layernorm.weight,
eps=self.next_layer_layernorm.variance_epsilon,
is_cutlass_min_latency=False,
)
hidden_states, residual = self.moe_allreduce(
fc2_output, all_reduce_params=moe_all_reduce_params
)
elif self.next_layer_layernorm is not None:
hidden_states, residual = self.next_layer_layernorm(hidden_states, residual)
return hidden_states, residual
def forward_mlp(
self,
hidden_states: torch.Tensor,
residual: torch.Tensor,
) -> Tuple[torch.Tensor, torch.Tensor]:
if self.fusion_config.PRE_MLP_FUSION:
act_fp4, act_sf, residual = self.allreduce(
hidden_states,
all_reduce_params=AllReduceParams(
fusion_op=AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_NVFP4,
residual=residual,
norm_weight=self.post_attention_layernorm.weight,
scale=self.mlp.gate_up_proj.input_scale,
eps=self.post_attention_layernorm.variance_epsilon,
),
)
hidden_states = Fp4QuantizedTensor(act_fp4, act_sf)
else:
hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
hidden_states = self.mlp(
hidden_states,
final_all_reduce_params=AllReduceParams(
enable_allreduce=not (self.fusion_config.POST_MLP_FUSION or self.mlp_tp_size == 1)
),
)
if self.fusion_config.POST_MLP_FUSION:
hidden_states, residual = self.allreduce(
hidden_states,
all_reduce_params=AllReduceParams(
fusion_op=AllReduceFusionOp.RESIDUAL_RMS_NORM,
residual=residual,
norm_weight=self.next_layer_layernorm.weight,
eps=self.next_layer_layernorm.variance_epsilon,
),
)
elif self.next_layer_layernorm is not None:
hidden_states, residual = self.next_layer_layernorm(hidden_states, residual)
return hidden_states, residual
class ExaoneMoeModel(DecoderModel):
def __init__(self, model_config: ModelConfig[ExaoneMoEConfig]):
super().__init__(model_config)
config = self.model_config.pretrained_config
self.num_hidden_layers = config.num_hidden_layers
self.embed_tokens = Embedding(
config.vocab_size,
config.hidden_size,
dtype=config.torch_dtype,
mapping=model_config.mapping,
tensor_parallel_mode=TensorParallelMode.COLUMN,
gather_output=True,
)
aux_stream_list = [torch.cuda.Stream() for _ in range(3)]
self.aux_stream_dict = {
AuxStreamType.Attention: aux_stream_list[0],
AuxStreamType.MoeShared: aux_stream_list[0],
AuxStreamType.MoeChunkingOverlap: aux_stream_list[1],
AuxStreamType.MoeBalancer: aux_stream_list[2],
}
self.layers = nn.ModuleList(
[
ExaoneMoeDecoderLayer(
model_config=model_config,
aux_stream_dict=self.aux_stream_dict,
layer_idx=layer_idx,
)
for layer_idx in range(self.num_hidden_layers)
]
)
self.norm = RMSNorm(
hidden_size=config.hidden_size, eps=config.rms_norm_eps, dtype=config.torch_dtype
)
def forward(
self,
attn_metadata: AttentionMetadata,
input_ids: Optional[torch.LongTensor] = None,
position_ids: Optional[torch.LongTensor] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
lora_params=None,
**kwargs,
) -> torch.Tensor | Tuple[torch.Tensor, Optional[torch.Tensor]]:
if (input_ids is None) ^ (inputs_embeds is not None):
raise ValueError(
"You cannot specify both input_ids and inputs_embeds at "
"the same time, and must specify either one."
)
if inputs_embeds is None:
inputs_embeds = self.embed_tokens(input_ids)
hidden_states = inputs_embeds.to(self.dtype)
residual = None
for decoder_layer in self.layers[: self.num_hidden_layers]:
hidden_states, residual = decoder_layer(
position_ids=position_ids,
hidden_states=hidden_states,
attn_metadata=attn_metadata,
residual=residual,
lora_params=lora_params,
)
# The last LN already has been applied as a part of fusion.
return hidden_states
@register_auto_model("ExaoneMoEForCausalLM")
class ExaoneMoeForCausalLM(DecoderModelForCausalLM[ExaoneMoeModel, ExaoneMoEConfig]):
def __init__(
self,
model_config: ModelConfig[ExaoneMoEConfig],
):
super().__init__(
ExaoneMoeModel(model_config),
config=model_config,
hidden_size=model_config.pretrained_config.hidden_size,
vocab_size=model_config.pretrained_config.vocab_size,
)
def load_weights(
self,
weights: Dict,
weight_mapper: Optional["BaseWeightMapper"] = None, # noqa: F821
skip_modules: Optional[List[str]] = None,
allow_partial_loading: bool = False,
):
# MoE naming pattern.
moe_weight_patterns = {
"gate_proj": "w1",
"up_proj": "w3",
"down_proj": "w2",
}
module_names = list(weights)
for name in module_names:
if "mlp.e_score_correction_bias" in name:
# Move bias into the gate module.
new_name = name.replace(
"mlp.e_score_correction_bias", "mlp.gate.e_score_correction_bias"
)
else:
# MoE Weight Remapping.
new_name = name
for k, v in moe_weight_patterns.items():
pattern = rf"(experts\.\d+\.){k}\b"
new_name = re.sub(pattern, rf"\1{v}", new_name)
# Remap the name-parameter pair if needed.
if new_name != name:
weights[new_name] = weights.pop(name)
super().load_weights(
weights=weights,
weight_mapper=weight_mapper,
skip_modules=skip_modules or [],
allow_partial_loading=allow_partial_loading,
)
def post_load_weights(self):
# For the cross-layer residual+LN fusion.
for idx, layer in enumerate(self.model.layers[: self.config.num_hidden_layers]):
if idx == self.config.num_hidden_layers - 1:
layer.next_layer_layernorm = self.model.norm
else:
layer.next_layer_layernorm = self.model.layers[idx + 1].input_layernorm

Some files were not shown because too many files have changed in this diff Show More