TensorRT-LLMs/cpp/kernels/xqa/test/warmup.cu
qsang-nv 07edac2818
[None][feat] Add vLLM KV Pool support for XQA mla kernel (#8560)
Signed-off-by: Qidi Sang <200703406+qsang-nv@users.noreply.github.com>
2025-10-22 14:12:57 +08:00

27 lines
748 B
Plaintext

#include "../utils.h"
#include <cstdint>
#include <cuda_runtime.h>
__global__ void kernel_warmup(uint64_t cycles)
{
uint64_t const tic = clock64();
while (tic + cycles < clock64())
{
}
}
void warmup(cudaDeviceProp const& prop, float ms, cudaStream_t stream = nullptr)
{
#if CUDA_VERSION >= 13000
int device;
checkCuda(cudaGetDevice(&device));
int clockRateKHz;
checkCuda(cudaDeviceGetAttribute(&clockRateKHz, cudaDevAttrClockRate, device));
uint64_t const nbCycles = std::round(clockRateKHz * ms); // clockRate is in kHz
#else
uint64_t const nbCycles = std::round(prop.clockRate * ms); // clockRate is in kHz
#endif
kernel_warmup<<<16, 128, 0, stream>>>(nbCycles);
checkCuda(cudaGetLastError());
}