| .. |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk192HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QBfloat16KvBfloat16AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE2m1AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OBfloat16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE2m1HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk192HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OE4m3HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QE4m3KvE4m3AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk64HV64LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPackedQkvMaskDenseVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPackedQkvMaskSlidingOrChunkedCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP32MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP32VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP64MultiCtasKvModeVarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskCustomP64VarSeqLenTileSizeQ128TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk128HV128LayoutPagedKvMaskSlidingOrChunkedCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPackedQkvMaskCausalVarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPagedKvMaskCausalP32VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128PersistentContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk192HV128LayoutPagedKvMaskCausalP64VarSeqLenTileSizeQ128TileSizeKv128StaticContext_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta128LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Persistent2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128Static2CtaKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta256LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP32VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeCgaReductionVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64MultiCtasKvModeVarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenReuseSmemKForVTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ8TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv64StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128PersistentSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ16TileSizeKv128StaticSwapsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128PersistentKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
FmhaSm100Kernel_QFp16KvFp16AccFp32OFp16HQk576HV512HVPerCta512LayoutPagedKvMaskDenseP64VarSeqLenTileSizeQ64TileSizeKv128StaticKeepsMmaAbForGeneration_cubin.cpp
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |
|
kernelMetaInfo.h
|
Feat: add sliding-window-attention generation-phase kernels on Blackwell (#4564)
|
2025-05-26 09:06:33 +08:00 |