| .. |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk32HV32LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskCustomMultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskCustomVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutContiguousKvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPackedQkvMaskSlidingWindowCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP128MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128PersistentSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP128VarSeqLenTileSizeQ128TileSizeKv128StaticSpecDecodingGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingWindowCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutContiguousKvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPagedKvMaskCausalP128VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseMultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutContiguousKvMaskDenseVarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP128VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Update TensorRT-LLM (#2936)
|
2025-03-18 21:25:19 +08:00 |
|
kernelMetaInfo.h
|
fix: [MLA] fix the bug with fp8 MLA kernels on Blackwell. (#3008)
|
2025-03-25 18:03:29 +08:00 |