From 5bd50d477e1a8154cc28df21d8237ada1c7ad05c Mon Sep 17 00:00:00 2001 From: Xiwen Yu Date: Tue, 2 Sep 2025 19:26:24 -0700 Subject: [PATCH] update mha cubins and support 103a Signed-off-by: Xiwen Yu --- 3rdparty/cutlass | 2 +- cpp/cmake/modules/cuda_configuration.cmake | 9 +- .../kernels/multiHeadAttentionCommon.h | 1 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...aVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...vVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ16Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...SeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...2VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...aVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...vVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ16Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...SeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...4VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...aVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...vVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ16Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...Kv128Persistent2CtaKeepsAbForGen_cubin.cpp | 3 + ...qQ64Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...qQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...SeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...2VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...aVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...vVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ16Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...Kv128Persistent2CtaKeepsAbForGen_cubin.cpp | 3 + ...qQ64Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...qQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...SeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...4VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...aVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...vVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ16Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...SeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...2VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...aVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...vVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ16Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...SeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...4VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...aVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...vVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ16Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...SeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...2VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...aVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...vVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ16Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...SeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...4VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...aVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...vVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ16Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...Kv128Persistent2CtaKeepsAbForGen_cubin.cpp | 3 + ...qQ64Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...qQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...SeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...2VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...aVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...vVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ16Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...Kv128Persistent2CtaKeepsAbForGen_cubin.cpp | 3 + ...qQ64Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...qQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...SeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...4VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...aVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...vVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ16Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...SeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...2VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...aVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...vVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ16Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...qQ64Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...SeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp | 3 + ...4VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...enseVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...usalVarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...Q128Kv128PersistentKeepsAbForGen_cubin.cpp | 3 + ...rSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...eP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP32VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ128Kv128PersistentContext_cubin.cpp | 3 + ...lP64VarSeqQ128Kv128StaticContext_cubin.cpp | 3 + ...qQ16Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...arSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp | 3 + ...eqQ8Kv128PersistentSwapsAbForGen_cubin.cpp | 3 + ...VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp | 3 + .../fmha/cubin/kernelMetaInfo.h | 8138 +++++++++-------- .../trtllmGenKernels/fmha/fmhaKernels.h | 19 +- .../trtllmGenKernels/fmha/fmhaRunner.cpp | 4 - requirements.txt | 2 +- .../test_lists/test-db/l0_dgx_b300.yml | 59 + .../test-db/l0_gb300_multi_gpus.yml | 42 + 1453 files changed, 8789 insertions(+), 3819 deletions(-) create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp create mode 100644 cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp diff --git a/3rdparty/cutlass b/3rdparty/cutlass index b2dd65dc86..a49a78ffef 160000 --- a/3rdparty/cutlass +++ b/3rdparty/cutlass @@ -1 +1 @@ -Subproject commit b2dd65dc864e09688245b316ac46c4a6cd07e15c +Subproject commit a49a78ffefc86a87160dfe0ccc3a3a2d1622c918 diff --git a/cpp/cmake/modules/cuda_configuration.cmake b/cpp/cmake/modules/cuda_configuration.cmake index b9d910fdee..db5da210c4 100644 --- a/cpp/cmake/modules/cuda_configuration.cmake +++ b/cpp/cmake/modules/cuda_configuration.cmake @@ -138,6 +138,9 @@ function(setup_cuda_architectures) message(FATAL_ERROR "Unrecognized CUDA architecture: ${CUDA_ARCH}") endif() endforeach() + if("103" IN_LIST CMAKE_CUDA_ARCHITECTURES_CLEAN) + list(APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN "100") + endif() list(REMOVE_DUPLICATES CMAKE_CUDA_ARCHITECTURES_CLEAN) set(CMAKE_CUDA_ARCHITECTURES_RAW ${CMAKE_CUDA_ARCHITECTURES_CLEAN}) endif() @@ -177,12 +180,6 @@ function(setup_cuda_architectures) message(STATUS "Excluding SM ${CUDA_ARCH}") endif() endforeach() - # deal with SM100/f - if(NOT "100" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG - AND NOT "100f" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG) - add_definitions("-DEXCLUDE_SM_100") - message(STATUS "Excluding SM 100(f)") - endif() # -a suffix supported from Hopper (90) set(MIN_ARCHITECTURE_HAS_ACCEL 90) diff --git a/cpp/tensorrt_llm/kernels/multiHeadAttentionCommon.h b/cpp/tensorrt_llm/kernels/multiHeadAttentionCommon.h index 03cfba7c47..a3363388f3 100644 --- a/cpp/tensorrt_llm/kernels/multiHeadAttentionCommon.h +++ b/cpp/tensorrt_llm/kernels/multiHeadAttentionCommon.h @@ -111,6 +111,7 @@ constexpr int32_t kSM_86 = 86; constexpr int32_t kSM_89 = 89; constexpr int32_t kSM_90 = 90; constexpr int32_t kSM_100 = 100; +constexpr int32_t kSM_100f = 10100; constexpr int32_t kSM_103 = 103; constexpr int32_t kSM_120 = 120; constexpr int32_t kSM_121 = 121; diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a0230169f6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b008157f7d6bf0bce51ad66f2924e9a5e6b190254750b85f53a87b1931008697 +size 672535 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b6ee66ec49 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e71f661b50c3f2f615a3babe192f924aef33eb315a794e36403a21a92923158f +size 634171 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e21b488bde --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:349c722e1c175598715fcf9f7ff40ffda9b85fc4f02699adb038aa94023e4f7c +size 651569 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ce53e0b461 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c040724d28bac1cf5034ec987ead0abc3eca29621bfdb02362f6c10c14e81aa5 +size 618385 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..01f14f3fc5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2852fa0332933b3d26345ddcbff1abc18cf3111bdc193984d5aa289471df4375 +size 676855 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0be62d76f8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deee451bca014cfcad6cf4aa15a88dfe7d8bf14e00d9918e5d1fea9b571b348b +size 630307 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..47d0cf6d98 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:727b8cdc921684e204ad18359350f3478b66fe090a937a0f03d0bdbb3e124511 +size 678699 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..242c50e3ff --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e25de8526e4f8e3ece86814283147edc8246fe2a3f27cae7975d2dec83276d5 +size 639255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9b049ca05f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a1400227f3ee2b7366eaa5b9bf21b6bd03c143ecaa74633cfd3076762e8baf +size 717131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..82c48ed6b2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ced57950db96d94c81a011440b45fc0e3994f47ce1027b274e52feac0f0e0a9b +size 634267 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4699431d43 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14022d08df8e55c360e796644f0fba8499508c58cf3cc037d413c19870ab9945 +size 654291 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..53233750c7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502a01513e85095de63486a9602e914656c0cd0ebaad5cdd7d928d5d6898e1fc +size 583118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bb44d3200b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d93d4baf4029784d519cc0cbf165cbb0147b520247dae0d507196fdf1ec6e4d9 +size 621999 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7f0a639066 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec6c4e5122da147bf17f1c976816142ec03b2d9870ab06d7c3513258c9b6e993 +size 550062 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..10df6a1f46 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8ef4599d4c5152fb7dabdd5ad34d511f833f44ef418335b480870432c7ec96 +size 657911 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..013a990922 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8973e4673d164413b9041362b25d88d57810c24b4981deccee95acef85c9e35e +size 611362 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..41fb981e51 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb708698b0483f01e81de87aa456018241f2226b2b8ac0dde5f42a279df8b5e6 +size 659755 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8ac7385e68 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26706c59e9be4ef51d6f27ac01b5bbfcc5edccc9362f8b5a45130362ab58e177 +size 620311 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6b1353c04d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a78bfa03235f26071042c3fb4faae960d5a5e5f3183d334594bb2d1a2f302695 +size 696165 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3661095997 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a90d1ee0b5b3ab387065795ae837f5619149999a2483d5c4dfa3759ff1c32c1 +size 617690 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a4b9c7d0f5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b553dc790caab29ae9d9f80f46c1b0e6270f1376fd9b94b5e002e959c7731dc8 +size 632189 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..89c2df6756 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89e55c89df2441bc2a35df4789be5980b03539a925ab2485a0c4c88b0a19a067 +size 564174 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8d3279ef68 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56ae77954f33c37de1f21e2c6ab47b855df8e602f56f5c2fcc86a8cee1b217e6 +size 600686 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a6d3f3a329 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:609d577a5eb29698b48b4953ac5103051f5893595a25c3fd654ee74182e55360 +size 531118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a2dbe1e1b2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8258856933f92d4a34021d0b37d871311e8ad7e53119601d24133ba4f983967e +size 680417 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d6a2553c33 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c94ac43674f548280cdaf2a94c32d53d1b38023363fa4dc523d5fe211c6323b +size 630785 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..76fc83e467 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:923396628ebc72517067aca3e9bafa53e6276459afe43860d13e68c0b0bc51d9 +size 681471 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e46d087eff --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fdf83cef2002b82171d44c07f6afffec05d181f7aa41f0e714e47082a48544e +size 638945 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3b0a8a23b8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab41b98e59fb1534ca32ecbe40f660eea103ffe1e3690fc3f8d3eb7f9ffbeab3 +size 727131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..8de01566d8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff6494cbe561c1734b4ef40be238b64daa558003fd54bafa51de9658ad3d7e7b +size 693921 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bee632a13d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c9978a03d994080c88365f570d72435f4da32177347144f93ee0de73d90b5b +size 638439 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c8646eee10 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04df4666457acee8a7ffb8e8d39bd7a0b61ef3479884544660d010353a45c80d +size 575654 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f00e64ff3d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7872a32915798ebeb8090ccd12b194d20f56a5fdceb3fd314174f4e9b58f0acc +size 606172 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b43b46eacc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad9b6c9ae5e7ce08dcb041ae546e0ea8cbacaa0135763f38c32ab465338a89b +size 542598 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..abe73c3a74 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28e2fa67ab200295e105340f068ac59053dd097fd7800fc1f7b192c318bfa458 +size 661473 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fb6634d362 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8805f5c5d401cf5f1b1e9cf44805fca82e8e70c3f25e9e8d0d1659ac9c489773 +size 611840 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..20ae25dac1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59450c1ee22418af646747820a1ced88bbe3b3fe6f05b9be585b57dc79ad69d3 +size 662527 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..52fd38d845 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5b098063a4187cb8988877800f2bffaa245e185eb8bcf678e2029da2052283d +size 620001 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ba8c1d0e4c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c91764c5d985686105e55ac4112d4d34b62cf366960a30409246ca24548bbf9e +size 706163 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..932504697a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdace05675a733fbb7a5f5a39e537751f570d9e435197399157889efa71d806e +size 678135 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a7c1aa9e1c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2b504e7484bf76fe5deee8d3f6de60bd366849cba82147c1d331f71e704bb5 +size 617126 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..79a1785286 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed81023905f417fb69fbc8773554a8bd367fd88644ae8f7c305030b553a8dd93 +size 556710 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9e877e01b4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6051bb1b62085f078270ad6c9226dd3718873484f4b83edc3cc21d4c4b42d250 +size 584860 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b8bfd6964a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef5bb94ceb562a6ca23510bc5573f1ce593c577ef3a47353b21d24ee372c929c +size 524444 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..56219240c6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3257c7aacfa8ff6de91e816fa15708c1b6f4c84fba1c9172eb0d2eb9ed9c5c7 +size 600408 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7e473dc9b3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f27ce0c4c0eb1ae774e1efdc1e711157eaef1df64831d4a52c2a4c6aacff7b6f +size 565940 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4db7cc3e92 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e6da1e53115103bba1e67dde7b0986b78d4901302bf9fd2939d2eb7cf5f61da +size 580354 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..91cfb3e098 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d3fad691e48f003b933fc3bd493ab4c64084e6c0653d1f4a5a84c96c5e1160 +size 550154 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6c653676f6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d680ec28eacb402985d353fd1f340922309839ddae097c62ef22ba3d802ec6 +size 574462 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..42e8d9a368 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40bbcce84e204bc33b371bc499138230e1fba8c3be53b1b75b5e1928448d967b +size 551692 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..64609675b1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eb883b27913a3fd3edc671de641ecea6b27fb29b73046b0b967a398a39c82be +size 597592 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6a1bb58151 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a5514b55abec8215ad7352a0f6dcbcddea09e959c79cc477eccba7ed04946f +size 569298 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..026e0aae48 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16447a2a543d46f41106b8732204f71c8f623612cd170baa18c3c9ad53e298a0 +size 645793 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..35abb7bbb1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5df6c276ff30cfc907d874dbaad52c3dfe2ef3c070360dd479bf451f8e080ed +size 566036 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1cde5fef03 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fbcdf0233e1e90d568e6a65431a2f0518b9a80c6d11104504a7c57f62c872ea +size 590548 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6b9b9af2f6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ab7e6ed0f3776fbbfc525109d3b46b5ea5c1b61a2c20b9c23ff38c1e25ccfa +size 503664 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..85a1ba23a7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf3e258ae4ec4fe9fcfe6cda40049b602efee7c2e28ae713cb93c0a0a6cc4d8 +size 557494 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..040f42a5a7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:548304a8e28396ebe912b24b492397d250b48ffcb87b3aa994db887c1489d1b0 +size 473766 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f20f0bbc78 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40600f5b624b0ca65511b9497f73447ba379eded5b76645b12bcd7ae31ce6aca +size 557886 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..877ee5c05d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a399c3304117d755906c4729c9fa7ad5febfccd08c3859a667a60fe751ce12a8 +size 534326 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8706e1ddb9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1819bfc27bb26deaa75e96f89908e8909ab5496203e90fa47639ef9052cce02a +size 578648 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..56d5149a07 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e09e5cdac9337bef2026bcd65c076454bcf5e24df52b4057604ebe606003de1 +size 549564 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..dee71e7bc1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cd12e2534fdbd6b8c547b9d5a2470001f32f2ad5479764ef623d1a840dacc6a +size 625739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..9d57935939 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8799b2fc6db3aef6cadc81bc808490bfd3b717dd07e98c774c92bbbe5e3e4f8d +size 549460 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0e3a563341 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09419f8222def86bcf5231af3c1c7b2dc31cabc6b6a781dedc7fe0ac13db1dec +size 569236 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e7e78027de --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e8f50048cd16b02f51359f3d4c709156421adcd27734cae493d2861c8681c4 +size 487088 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b37b51554a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05cf675571b991a85bd0fc5947d13b6be85c6fc8a06b322023037f3e17421dad +size 535392 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1617c0abc9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4269a6c9836193305e5ed18c6935efa6c3881e59307a6788effc67ff0e7d947 +size 457190 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..29d6a7ccef --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0924a29b5dca879d8ac8cfb3ec7fff5851cb2df33f84fd2c0a2f7e58365c9d40 +size 591442 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a578f51dd4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edafadadc70d17ee6490907f37c7a9f61fa9f0235531ef49995a6e2797c4e955 +size 562332 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f30f34828d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b6e4e37593e07ed92a1142cd72614c818a4722fb932e12a931e1b8ecd8835dd +size 599600 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5ee4e608ad --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dae16ee22788196ca4951fc123aa47e1cfe2390441e94dd83a8b4aff076b567 +size 570516 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..660996b6c1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43b8f5eb24e7b546ecd2f7ae7f2f85cae6e76a6d73561323323c2c4d54275db7 +size 655003 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a459764a12 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77b9bda1e9f70b502cf47bc2b231b6116f564743f4d1749243c145edc6d3b082 +size 619747 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..faabeab5a2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc6ad90456491137c3934f25a1da7c1197fc43f711796be2dd25f55d6121428 +size 575512 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ae9b7873e6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:447d21284a61438c9349a628ab346386f37fece8f9f4017517de18e6d3ab3ecd +size 506412 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d6f70e5020 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9c841e7f8b6e8e905a599c7a83e5cbd6b03f1b4a7a0d7bb289988a469b9623f +size 540088 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..745b9ae4ae --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c841275d836683c14cfaba4e0e4f5f9204141dafb154f534fd6bf18161cd7216 +size 474936 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..004f90042e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd309ac9206333a6058cc2ae860647c5dafbaa8f068f67670ccd79863db77b2 +size 572498 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b9dd044a3b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1d13dac9c137f97f0e90ad34b00e7ec793d3a66ba1a55f5f9fc3a640f2d228c +size 543388 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..90689b0d4d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b2d8d80a049dfd9d30fad88dfbb5422556960582cb8c24b0b0b8273254400ab +size 580656 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7284608f39 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd00c98c4c87d3afdfdadd301f8261d3aff270aa0fbf8216f4aa7fa43bfe7172 +size 551572 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..bceb9f7f5b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7301c79468d8de004f426bb9cdefb7302b8cb77e193fddb996e648adde219d14 +size 634159 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..c43dd45b70 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09062978682d7add0972aff436486b6b4c82e5c1b0be271d4b1c8f0f8a650b0 +size 603170 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4c6e6a6d46 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cf43248972dcda4fe0334fa016a467e4566350b975fd498343f4535d5a6a278 +size 554200 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a55c5c05d4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e76cbff768d6c2aff34f15f20035e3d0a9340ed58899efe112b110e5d209b349 +size 487468 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3751d56327 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a8afe52b247e9c27691025f10fbbca66f3b195a967944a3128b785f2473fb85 +size 518776 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c4f4d37152 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f524ae3790ab0f8ac026d2c13a8edc9566ab5baa1849d4d5cfa654db91bc2a24 +size 455992 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..502cd6b7c4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:554e341207184f70b89461fd7dfaf5e4faddd05b01201cad62422c28071641e6 +size 802289 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0c75f59e78 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:990a64e7dd2326714eccb6af5e9adf8e1e8d64f510961afa127f91a748cf81cd +size 715157 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7652cedb16 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f8a52ff9d0f6c5ffc98c3a7f21ed2aad30f9d76fe6cac6151783b246af326e +size 804211 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1c2e68c124 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a8eb61d079eb62c67fc557ac06646e6cd9031d90f42a6b36c6645be062ec453 +size 715007 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3bbf50447a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a153012c06a150bd18fe953afd1b98707c1e666ba39dca77040874b8afac9ff2 +size 869513 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e63574827a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f07cec8e71786afa295285c2ad2a5cd752fe308bbc1f0a00d22a74bdec8218a3 +size 782579 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d0ab0b4add --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6f662c6d889f8588f8952fb1b4b037654e9a2cf42079a922bad1cc43bacd8ae +size 859023 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e1f5ca3b7d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb4d111e756b1ec729e605fe2eb96ff5c5d3dcf24ea15b1734490d37f05884f8 +size 815947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..50bbbd43c6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f855a7f0acdeb6744d8b3bb744741884b8bddb6fcdaf4ab4bffdfb1b574b1c42 +size 837663 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..19574279df --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:659cc9111e935d6f4b4217787f194a02e79f00482324f4c7f8349a167df08eec +size 794833 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..64126b3a08 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae815fc59f9e17c6e41d38d5018cfe6b859eebf43e6a3fe164e383ecba355da9 +size 933055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6fba58caa9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:588538d1ce8ecb783720121f58f4f948c69221588e093b6ef50e7a1db8b8263d +size 912823 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4cb2aaf652 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7c9a19a7a9c1e861f2f39c8fee559cbecda3f76e834be756edee900d3760a55 +size 892039 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..48e606778d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f5c01927f2d0f4adaa1881f04a26dd2bf3b612064043aa8a6a4ab5c0690a66 +size 769439 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7e2f0bc7d8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:777161f3c67f29aa49485b6be211c5a10f97566f7f46ff93d4638420496ef344 +size 908537 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..617ec0ac63 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9beb011499242875de95531ad9b047c61e13354ad077b3bd55cd7baaa3b626db +size 891709 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c5dd2ff53f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75931f3b0e306643f9ae69672b84728ea443be4c63c4d7489397f938155c2f03 +size 871271 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..82e62fa83a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a49a4ed6482ab0f969aaf2e0ea43d8473382aed290492c714ef11a8494659e8 +size 747731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e2c85eb49e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae9c818aac1bf8c77c649a4676501706cb1c10e2d34541ec27b31ca3bdbce5c +size 760483 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..21b4e376fd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:931d228641b9b7674dd352df56c71f86fd5e6869ce10c34444aad68f5815adc3 +size 693585 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b8126744cf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cba9c54ca3504abca634994ec752a67020bb114a69ad17645ed35ad9defcb8c3 +size 759293 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d7d1ce80bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ca33590c2731efcbc108328616682fbba904cbd116a931cc3d16adca0d2892f +size 712623 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..87c97c4b86 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5340a332e09c603336cd2ad3a0b8eb0a8e930870c9ad76b6b8ffb5bbd3985ba +size 933071 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..dba9076aff --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a62f3100fe840f53d2aa2e60a5b4d1213f7f2314839a739b26da6241a42a8eb0 +size 816537 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..21cc1da7a6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1793fe59e045222a77c862e6d268d1e700c242f3463cb58c78ca465addb0780 +size 772403 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8686b1ec7b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccc399bed997d6d7fbdb881532f765eb04b1efb0f9219f9e36694baa6abfabf2 +size 645459 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b3a22214a3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3977c1b7c16a93475c5e8745ddaeabeabfd2670b79077127617c9f0ab24c3784 +size 732539 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6bb330e2ee --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b1a7074a075068568e5ae0f252bd0d36fdb7bfa9bd2b370b15964146fb90563 +size 608654 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..39ff86fc29 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:772a0664a2cac4606e044601baa8d117059e421fbddbc03eef48eed5a2a74eb8 +size 733943 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7e4eeb7f8e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89be25748ce9941526d5e8f7595b1915c0cabb095721dcc8bf6f62d07cea328e +size 669265 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f715b7104b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ecd4d4dc435d7d844a2cffbe1ed65e50706a9dc4a27a1e916c7c79eebdc31ba +size 734281 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a96ad0a06b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ade8cc9f092cf9472d1362800f2c072ce51f132042a80811f0cd14f9fd7d1e +size 688399 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..179eb1e8d0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64c4f6640c07c9dcbf859bdc7fc9ce717a6ec1a6eb1086ac7ccd6444980cbd18 +size 907861 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ea495f9f20 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a507340e5998f26eb6ce0302e780f0c1548eb25d7f32dbd8fc5a6cc1ff5f4109 +size 796213 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ef8c3d80ab --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e75851f1ba4dd2bd0c83b232c5ef3e8670171e4e72080194e21e66c6c6a5121 +size 744923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..97028b9151 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b1335ee8926922f4f9b67cf0b4ee1553819c4e58da560fd7700ba58c3f86a8 +size 622963 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9a62620e9d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b761c6e7e50b61c1c4d067f971314a87cede73650cc8b8c063c2e7671d7b9e88 +size 705011 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8e2ae1dd8e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab7efb855ba5181493e5810b9e077249881eea4d13cb81ef45b54c6a6b2ef935 +size 586948 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b3ce048f05 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b644f96f7fd6f458dc6d10c45d8c1719e85e16d92767b5b1fadcdd6572e243a9 +size 762837 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d20cad86db --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c5ab41a89797e5d8992f47a8cb148549380a9c4502ac1f40a18c5584ec82b6 +size 709851 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7f5d9f74ad --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe17462e3ac9c27e148dac56a1257513ddd9f026fbd88cc6fa1b40c528e4c6c +size 760511 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ed47951b9e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88522c9746d09e89ce66645cb0eff406932b1373833b45118953b6df2ed1f632 +size 713741 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c7a5d8302a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d2e999c0f80646681db9388fe16f18bf843a99c2b73c34e9574dfd00d468401 +size 930145 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0d04198568 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd70f265533bd1eec6b91f8a8d29e0b32e4a1317d0cd8c8e63762b5ac2db8677 +size 885589 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..172cc2d7b4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ccad0c1217a5d16dcca178720506c5159552de3d6f08f3db0a18773334418a +size 744859 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..eaaf657ea1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:102f0fc974710dae19108257f31a8922a0973f3893057e62482a1cbcbc77d8a5 +size 647121 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..76bb64d854 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fea77270051cb3f674ced2b595428eabb2e590896ba90606d807dc69ea8c3418 +size 704207 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..48a1ce7de7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7721184818251e6e8bdeb4e0a4f6cbd91cbce37f3c863ad7bbeb403cf3b6b678 +size 608490 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3cc4683973 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382dd08215219a5afdc2589a21db0e75287ec41c40ebf9d2e8b9a87736979364 +size 736739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d75db48690 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a49f7b0b401ab500fc92ddacf4d5cac6c546a0e46398068dad610931182d4b +size 683753 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6034b25426 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1962568383bd5be814e46b3354a413ae636173f6cb2d1486d2d1a29e09cb6b6 +size 737079 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b2b21504b1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14e7730b92025ffda8f04c3b732193abb63f810c24745b49a0c81a2fe2160204 +size 689519 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..fa039d23fb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:716dacabeb5b553892106262e346e19763208929543f0955811d3bc21224aabf +size 909573 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e67bb8124c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24ae9591e4997e480aef715a935ed0b2d452ea5b77c2c096965e3d7ec1493d5c +size 865263 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..08b36961bb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6493fbaf18abf45b667e850635318b56e13009ec1db297e0c9c9e92e8d310f52 +size 718959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f07380358d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81eeb48eb72cb5216df1c73c9c59019545d23e9751920345acd87c3c58480f98 +size 620333 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9c98dc7f30 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:135cbd7f6e0cdb24f0f51005c1c6dba537e4c20c2cd6aa522c2eca7e17d2c4b7 +size 678307 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5d4e621797 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b3eaa419a339ec268be3921ea8d0aa0275e1a32818fa0d871610350e931a2f +size 582590 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b0900ed802 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:762b32f6b24ddbde3dbe6700284036288178b3d2f8eb776b7309b6543127e567 +size 726463 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b28cd7aa88 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:069abd8fb1b917afb671e2276833ef797333ca6b27db4ba1ab2ba7d7c751c5e5 +size 625963 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7802ade828 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6b017fd82f2c3c4bf9aee886daa5fccd0f3cb3bbebfbbd83a74e8ea8a7d1a15 +size 723501 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ab47f7a053 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ec6659170ebf9ce64a9c481a90e68b69a9d8bd24df28402299bd26c2059f66 +size 643375 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..adacf3ec36 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7875534ce18a2878d1cbf77da7a9a069b26b93539f3d18e398a82d33364e10ba +size 793835 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..dde3b2e126 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d54f4e58701651cca892cd65833c215b6634e6d37d76af1037c3732b1d18f192 +size 694421 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9a375f44cb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dbd442de6b1c5a7dee65d3eda0edf764f1c063aed965b9eaa8d15dc57da9aef +size 844815 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..38e2e11489 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02b6204dbba3b8b9598c0944788f4dcbe4a6bf66e5f3663555473e2dde05c81d +size 782647 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d55afb73ac --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70513875d961bd60392ac9a08bcee418fff432064f779ed1ad27b763267d9428 +size 794151 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5105285105 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a9737b29eca461161fdcaa21de54b18189cc58395e624082a466fdeddb47fc9 +size 739777 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9f6d0ef85c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae54ce7f1dd90294a736b2442425206c7497c2026356906b72ace7d5171cd51a +size 935617 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2797a241b2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32dbeaa380b20217ad32339496a05a20d24db872edc3964afbe0a817f9bd8210 +size 870939 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..99658d2e5e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfaa3fb9c3266ffaaa19582957dc889892dc155b8660c2230ffc2d406a7225ed +size 929493 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1ca3da7c96 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9e7763bc2ab50f940cd17fa07be59b8a16451b5dd5b299476aa1822e43b7111 +size 890075 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..17b87e8f5d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f5cef7200169ed42a97627f92bc1ee8f6ffd1f98982d786d931d51704d14ac3 +size 909539 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..158b45362a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:931166ab595dc3d94715c07a07bb8fcd03a88152beb9d96300cccdbeb08c4116 +size 802971 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6d37842097 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76773025c8f18ae9d1967374cadf5ccfd6c4c15972d25008f691d1092ceeedf8 +size 923707 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..03809f4c8a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db87713d4035d6c13622b3392b0b930eccff19ce501416ca538d68d0afa1a35c +size 832187 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b2a396bb5f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bde7f6bc5a1d650cf6c146df02ec6897fce6506bcb962bcfaefb1f1998864a4 +size 878517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..74233d5842 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b0a007e401e9451ac7597f7db82fbb03720a50d482eb17d8e5f08bcf17f4427 +size 778509 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..22bca1bd29 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291c57fdae2b341acda42e8544800388db93f5e93bad03be660f843ba7f20385 +size 887319 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..22359cb73e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4483ea00cae4f3d380eec6cff313d02f1f1950c0a6919ce38df10ea4110a44c4 +size 824417 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b6d4cd51c0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f25f7348019136b1cf7373d6e6621549a504c09285e3e916ddafcadc3bccaf65 +size 881937 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0216c4938e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb860f7b7392efe0b119a42f68adf0a65d2806a6b70596feedb94e7c60bb7cf9 +size 843257 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7253188e5b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed87595a0d0edccde016ed89bbdd3ed6504b0a301424cf45444686c589346e8 +size 863067 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d047ed92b3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd385ea2aad1fd59a826d3e10d3481c64e7323438b3ea89b6efdd30b30dc4df +size 759607 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8e7b878c85 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38dd1fc87011ed8527199a0863ff258ec6f42dc975b41e0ab4ac807052c4a82c +size 873783 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5b9d3e4dba --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61119bf5132639c9507187f300ec06874b972b88a3985f195e933198ccdd0f48 +size 781275 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..aaf08facab --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88fb2e683ada2d03ffcf5df3174ee956d8552a04edf0abcee89a444a63030c95 +size 828591 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e08a9dad79 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d61c7ae5f722e6002eb3c1f3ea5d74756553010655cadb49ac65cb38839c615 +size 730113 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8aea5c87c2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3596f9a119c8b0c34688a6f772212f3052abe903a5ae07b59d5f07fad7370f09 +size 939845 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..43076df69e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e7145b229da2a8e257951dc8935ea628f19a04f90150a094cd9e1aba0f71e6e +size 892483 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fd8faa501c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5edd5d118a1b7fa74fb681e21765f46e81315430f1693de062606eec89435e38 +size 932685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..cdea3bbe56 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d00418026b39acba0e771ccee5ade1c32b717a69ed6e6df4a0364c5e18c9684 +size 893117 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..685c9d7ce7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbfb7396ad922887174417db5a0c276af71910d290a6d1ab931a9a7bb965e51e +size 911891 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..cddd30fb41 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d8b418180f4756dc959dfd05a06fe5d0434f8267bbc8f8e86f8e136baaecfd8 +size 849675 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ae6a003475 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e50273c0b3f9fda9ee0071065502713bc23edc9817f151c4e12b5722c875612 +size 895375 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..850f8b025a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff077e130594fb04fff07a7e5cbb7aca36973c049d11a650be94fbc2d01c151e +size 788609 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1ec67ed51e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a21133ffa37141e5a6e70903fb9561fed1ed90d9863b3176abf475d56ac3b91b +size 848507 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..26d1b43e1e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edcf201e92f790edca36045dae09270a57088ea7487f97fc6de816e61d9da63f +size 746773 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..70af4c3172 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc85a3b866611fc1d348810822003e262646a51579cf0ff7d5e649160b72dcf +size 890905 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..77192c9bc2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:911d31d5a92cf57625e40c6734a4f68291a5e5e2b855632fcacbbc8e17427795 +size 843543 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a64ebdf6fd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:305c9c73fe4351a92e59991bc8679608a7ec16eb3f9a0b05e3e39d1b4647704c +size 885571 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0a66cedc8d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:877f95b1ee05c842e5e48d9a7912a9de03a9c2232f365859f1ec50b3ef96613d +size 845955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..05753976cb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb756d81e1908bb70f3c971cf1a9bd8ef1704665cf0bfc54edeefb745644ee9f +size 863447 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d7311ab47f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b413e5a55bdbea6dc52cf6c8c583b882f349031ffc18b1347fc9bf403428cd2 +size 807889 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..67014fec4b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:302902ef004f6edbd79dbe65eb5b33d0e962bddce78abe1db098b8dc51f3c685 +size 849841 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e86cee9456 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8b04ade614f6b30c94821abb76f4514a047d104cc9d1f091dc4f54bbfd0df61 +size 745641 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1b3c7aa28a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9cb5f9253ae1e19f7a6d9b310b4e30fb65b22ecdc1482f16535de45a328f834 +size 803021 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f0178a023e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e340e2939c79e89f6a8fce18fe64d665e0b5efeee0e4d524f4565cb88367b5 +size 703803 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..bf21704a28 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df3917ddb1602d581712250dc45992898fa2655d55a7518cca9a36198518d23 +size 745751 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e7bbbf7454 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cb3cb981a04413660b0fd8a97e7390660408dd53da866fc2fd926788d120c28 +size 639133 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9c621842a4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc318b1af700e5f544d3268e275922a9e20c9ee0cde866edcd086d706e5ea32 +size 739137 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e7bc4daae7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe86ab1bc34fa6038a13f3d9ae21c1037b05b5527d31917286d3dca20d47384d +size 638045 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6bf902573d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d47f56520bfc09cbbc72dd65f95dfb0aee7a06dc3c49eae0139f819d520cb0 +size 813763 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..bc6ecf9d0c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d476211c824f7a8026a1e327f1f7e94e83579a8722f5e645f25afaa72bfd15 +size 707985 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..0ff68af11b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a171588528a45aa10ca0590505981dbdc34852c23ab669a0c743c7eab837935f +size 759023 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a192ced7d5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e46eaab4917a15cfc54802bd61cd4ee201d7214459407591d78080a7790679 +size 713431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4d1dd09329 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:232025d9744e32e9578cd6a503e3dec6e41148cfdae02abfacd1ce59f589a4eb +size 749305 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..bdaaa3b6fd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d47645bb91e820501f3f0671494448bf0d21464e2c49c41024998ba9082f3d +size 703367 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6b734f14ce --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bd3711e0913c1d4b6111b4738f13a0d51f6f95a11b430d72fbffc5921a635e3 +size 824817 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7f9327a594 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:873f5fb7bc7c9b8332830b71fb85f96bc3cd414df23ca146233d4407b5b3cfb1 +size 815437 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..16404b2fc2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ebdf6a2f01bdcfffac6359095e4ff6c3ec3a9222bf4d57c36ac31fbab475807 +size 795493 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b976aa4f0e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76ae40a293fb5753064856bfd83a47b2f309db9472bc0ed6bb2f9ef32649cc3 +size 685323 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..406c12bfaa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56679bb8071304ce5f21af422533b270d5b74eb887a38060913ac88947ee42cb +size 813913 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..463b923dec --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1766e301aa40062c7df29c275262a31423dff35e7ac6b5ac2fc288fe95fd77b7 +size 804535 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f9b575e825 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bcca12150ce4d0762bd799cd0519a5d025ab1ee7942a4247e08336914545093 +size 785725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b5a5694b4c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caa80781dbcad0e65945368544f2b2df98800b2753ee9a7993219fc36fdd8aab +size 675259 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0d995462c6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17fdae32c75e4063dca421cc778bf4b98608ca1c9eb2382fbbffc1e13b6dcebe +size 680019 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..832cee414d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:952c4a2b99875b5aa87d11a1cf66a51839ad0241a0fa8b78d3357314ebe7cdb9 +size 653871 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..55f8993e44 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:776362aca976253de90bd06a91c227aada43a478531e9335606d3b9f4e669152 +size 697477 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c2bb7619d7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22d52904d31723338e59c87eb5c64d42484968a75c4e57fe11a8a4a4a179435f +size 671427 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f958360d3a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d61f689b577b352281f247d9c2d752dcffb874aefde51d2031ed1b1d9756ff +size 812301 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7cb0f2c8a7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3279f3b16b18fb11a8254e6d403f27405d5a7f741da249f98bc0bd32adbe9528 +size 712393 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..398d700f4e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc1af5c0516cdef49293a87c59e11d73609863e1c68324ac779541a4d0d2bc75 +size 720797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6122d1413f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68cd05f78167b81e48d71e74cead869241da05b53fa27735e5dbe3ed892abc35 +size 599280 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e28acdfe4e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0042ccd23b602bc91015e643e16b2bd7a7ebcbaa4d18802ce4a1c33dcd7fa530 +size 688235 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fce00e0602 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f94c6be29856c3bf92e490f59cc2194dfff6537896473b1d8401415d36715a +size 571554 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..681fe72bac --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87979614bebc8b76d3c635d30140ae8187009c708f534155bf30e1563b8e469a +size 669955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..860ec1f6b2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b4446bb18e05cbad2b8c6562edec6519a498f89ed751e058307f146bf91d138 +size 643807 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..042ef74f29 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaaa30d8f888249366406ce52c1f4e015896bc0c20ba33f442784d589ee3b6d1 +size 686623 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..41e88f44df --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fafbc4eb52a6847f1f56294723a6a4d999ebbde837c7b27b48d70d5f17d0247 +size 661363 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ee5608f5af --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c829d6745bc95064e9be671d6e6e93f25f56f0b8baf9769e7ab99f9812271bf4 +size 802583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..37098698f3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb187b9e3c5a4e5e271941d960b6ba7b5e0a23d6fa31f42ec0e58c3064f410b +size 702279 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..58ce658ce2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cae99deef586ddcede37d4307ccd01e555922a24f4d272065e6da115495d869 +size 711079 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a83cf6e5f0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a133894f0276d59b2bc00d10c17704d8b73a1fc1b72290b7e1683ab07d5c03e4 +size 589216 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7d633fd792 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f332a67eab36b43d8cae12336a530357915099e051b761bfcc0ef523061094f5 +size 678517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f60a24671b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba08a9d69d0a99227b76e9f0bc5391ee3f25e9d2760242af14d30b6d9295d7f +size 560700 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2a47bb4597 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d600d51a57480a51dd914675368ac7b299391721302591ff2329c25303d5670 +size 696087 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ec386afc40 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a4840137a1a1b396c4f50ac68bf7cadc8360d10f2483187d4d68a67d044025 +size 666681 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..12f4cd0951 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c12e3957a11f1247253754bfbc4db7d2966f045ce636596c86c4eae4d6a2e054 +size 700273 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7aebac536e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0e21d66fab6cefb853d7ec2e75d2259747cf6e873d86ff8d56fe9c1fbe959c0 +size 672547 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c7debac771 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78cb539b2e3ec98d9c35b17246f583f4356e090749892f74500876498caa4e24 +size 827381 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..31cb2ed034 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:990cf10755ab9af8e87f45a110c34a30f7d71a9b25882f2e61d18a7ffba2d049 +size 784799 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..24331122ff --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f6931641d7936c7f74516bac369859d360f2b0f7e83effc565cda2ee4b46693 +size 694833 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..86ec03f298 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28528e06821297a1b55a53e5ab8a6aa2557ba89e1fb74080898f5b1c2270a0b8 +size 605186 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8b21613ff3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6282720300b3036811bab5968784ea84c8489dc5e6667bb9dbf28812c6e6a66a +size 661531 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b366677a81 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33b7ed7d4e3c81298df5860278c60b91df874a19835254efe494179a0540a011 +size 574054 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..91a6c2c723 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db55724c8c2df77b5c40f6f3908183d20490c2155e335fdc972c62ba8a6d6f4b +size 686023 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..cd205a735b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b69ceff0242dc0263ee40e94afdde3ba0950ff5c9e28e18f7acf3a8460e9aef5 +size 656617 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b80bfa3566 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cded4dbe0898b289e904c7ec783265f8e0612e568c4b737a90e77f9fb576079 +size 690209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..92f97e317d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ecdb9b30b458cfc6aaa1f6622febc94a04be9154dcbd4560e9ae88349e10a90 +size 662483 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..af1b23d1fc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8114852253f28bb37c1e1e229eda239b0ab5e5c07d80c5f92db7e67b326b984d +size 817613 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2a3eac426f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc186af5635c8263f8c595764e8d978fff7b095778bfe3b76b0d3c9c697fe5bd +size 774785 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fb994e66da --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63971c9af802f21badf72978ba3340875ff3bf53836cbc8231d37cc84e71ba28 +size 685115 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..21ce19f022 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4200f3173f0746764cf57db6fcd4819a7f8c01fb3cb85e416ffa3100ea10fefc +size 594282 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..15adf2b363 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea5a151caed9dbc6bba9423d1d3a0cf1b21dd9f6b8ec71383e488969258952d +size 651763 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..86f11aeb65 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:945619733c1b3f380a8e98a12d72d349e0576f32527edb92b8258834b08c47a3 +size 563940 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b643584c5e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7077f65fae68083f18254c837d5da3ae0e8c792540ec8644bc2dc204a0040217 +size 829239 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d4d384f97d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c0c93ebdd333f52811498c15e201beb9cd907bd659a5e1042cc0de20a07d61a +size 738111 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..bc44203e1b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3233015bf7bab35cfbb0b8521b4611055974d70f7b039548911e9534a574ba4 +size 828003 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a4938a4899 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4aaba2994b033e829ac69593074367fa58bdd0e7b0703b7ea30cb5e861d08b +size 761789 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3d1508550e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63b0cb04e9a62488d2e1d8d2263acee2d9631eef26dca53a4152fae7c60ce6f +size 894261 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ec0749853a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fca0164a86f3e2ca18d4a5d052b00205d908c0c3794388b458678aa1ffbbe9b +size 849459 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..531f3908fa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:defc5765d45756bb4361ebed787d3e06e4a85e1d89f3520037de1bcb2589cf87 +size 864661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0021c155e9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ce67e09398e1318b3d7b0832ccf5fd4435ab69c85443f975ff19e090d52415 +size 818477 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7cdec74ce5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e45b15a3fffb336194bb453ed12177bf1b1079266cf9c1d88478f2ab326f4659 +size 966829 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3252888208 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13cf8c23404608f132c3c08da2400cdbe3088543a1340a1be11ab8c99bb6dd23 +size 876739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a944b1b6b3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be7c012fa89244b47c68e79d5223b8919ab87a77b882a104465d741f989457ab +size 932247 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..cf79feca30 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f801a5e358aa15302c136c07c212353bd207951260fbcd59117f211130716796 +size 837617 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a56741d49e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a98e4a08b337b3bdec0189fc76c12ada489afc83c07abca0f238b85da477275 +size 830031 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..70644bcd2c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38d552c5ddf82817bba1e4702323c5c3ddc177bec39791dee3f542c6c48426e5 +size 738115 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3eac2afddd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9621eb617254cf16382748f2405b5264456dda03af744ae9e4911d29ea41978c +size 828797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b17a91ab9f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f070006708f600a2e7f85f7f9cc817f9b903ead386d32261b4d4511dbf5c134 +size 762583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..57d3060a7f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:259c04873c1af886fd64fa46f85dd0ad1aa0a7655af09921ec06c70c3dc43cdd +size 1084147 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a0bf490844 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2454c54b8a95dce6a1d34fa89399b6a58197562bc5dbb574263a2c4e8960451e +size 976253 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b9a6f96165 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:678708b5f8a7f55f182eafab54d3134dc0df2c27c021cef2e6d11e0257387357 +size 959727 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..578e3d3bab --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9487b4b486b6b0d304e43ee03c2d6b20b939b9ebaee6af7a0dcc67656ef85564 +size 1032493 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7bf7fbbccb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24fedeaac2aa30ccf8fc82bcbcd7bb92d2bdb64821317e682d3a9d012cbbfb7d +size 928051 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ea4afee6ed --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e88f526c81dbf7c3ca80ee5968a76e0c14e0f1e9b46fad65508254608855f05 +size 1091689 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..acf75b1d5e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:442a2a499d3e2f1f449c14fbfdc268787f8c79bb90e8ebce64af217fa69faf27 +size 983795 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..385e67a258 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d9c3a5448f29707b93555806745531ad5ab50e27d6c60ab6c672c65e5b560d5 +size 920353 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c872de79f2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f5db7c33b8fbe95c25441ece432e9288501f424f515424347e838a29b762fa6 +size 1045905 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0bad2bec72 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:999754ecba0692fefac5d0369195adad87d3e84b13c913cca8f7b2bd40a81574 +size 941513 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5d78c1c19d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dbfa96389d710827093a60889da42bf2cb05d4fa3dac4f7f62ff1dba950ff9b +size 1084867 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f69e79c29e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b59e56410fc3ebd71b7b406a14111136f1fdadf27df8d8b777d7454fd5d90115 +size 986487 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a685ff966f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96a801a0f1e822a89609b0626dbb6bd0b8ff5293adaa53a086e56acbb8c36319 +size 985853 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9d0b4b23dc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:421a74bdea64f33142dbdb56c993727232eece92c84e247bf5bf116b0eaf3f1e +size 876719 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..645b43dc4c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44e8384025670be4930507b346bab02006de94ce8d00cd5dfadfb6583bfa5d6e +size 869675 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f578245650 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d573a3050fbec8ed39db72297a971b5917949247d200e4e06044588c573317f +size 809825 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..aa2848f15c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcf1e53dd4c2b25b9dbad4facf6a98e289b5bfd745d28518670307f995074ffa +size 1044115 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f7f6441079 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbb6d304b958a5cca817690d4b09918ac4e629e1d230953891d454a9c76904d2 +size 978297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..08cf450edd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4825758bf8f45ed406993b99d8cbba20479e54d28b6c98b5db9173c552ba8c2 +size 948653 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b9094671cc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:125cb3b05c0a4f58f34ad2b9e0d24c23e2de98a4954cd72d1a3f3d1042cfc6c4 +size 840309 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4db62e3f15 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47dae5688a0fbd19e237d70f97b609471fc0462049604ce321b18a3c694953c6 +size 1010935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0b0d1f2bf9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5188c22944ba5624f16eb59f4088c4ece14d1c30fc89d6f4af7e12f25c043686 +size 943149 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ead7e26279 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb374842ad8018e2c720c0586e14b92c51974ec16d2bb031b40721f43406a8b9 +size 867771 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..717a5be0b2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76d412fd36d4c9a598394d332f52b70f0addc82e4a75f7cae52f9c6a5909b40e +size 960121 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c2a0bca462 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:984b77f2fa3f773a2bff0e11c412f9ada400978e8568d9ec4fd4bb3d2c12fe40 +size 894899 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8b30bd59d7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a8e8619c19a5942264e540153f93958bc58b15441f39d70913b9075561d391 +size 1020105 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..62b8220233 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ee9218322b1b9a9424e93c507a890ba1a86971ec124144f789cf3aaa749049 +size 950691 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..36b843663c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6d745388b08759c61a3609e6718effe329f253d4dea1ddf796740a744438148 +size 828397 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2cb56e70af --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f56faa8817953e2f292fbcc7ae68ce6e11698b8d86ff8601d8910c7f0239f79d +size 972695 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..cf19920665 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb642b5fd390ae10200a3628cccf084dedd38c727db251b680480c904e305a21 +size 909201 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3c9aa65ba1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37ae4c6d0833648d0848c39a96b6558733b6b473c0659765b83892a5fb1fca3 +size 1002431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f29526a104 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:189eb0f83cc528bca55d33dff23d07eced663e70f2b32c377f12cd640b34940f +size 914165 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0ad427bc94 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c62e12f8a0f1a71dc0bf5e970338a0890c5c817f4732335debad2891c4a61ed +size 949789 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a1712f96b9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9048d287bee161835e6febd1a92590e13dfa6183922eebaf1f7aa0f7a07d40f3 +size 845293 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..271ad6caeb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd6dcf6ea3b972660fa8450408d05832686658950915ca31c7dbbd82f574d5fa +size 785511 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bdaba52c96 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473590cf5b3a4dc35c1e2e77880d334617422a4a08abd9198314e214146506d4 +size 719891 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e8293ba77d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b8966b5df88e998f5cb626d4ae83f72cb6ec476ac9e5dc82363a42fc230d8b3 +size 960841 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6ab7a7f944 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:537a176cc1bc4d1b6213b5d9315962fe8c5160e2ce6fde2ec8f9503098ae1be3 +size 890285 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6de612e237 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12091aa48d8133d4b0949efc342ce973581bd0616cd1d07e14d6be794f7e9474 +size 912491 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c9e45682ab --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d57a10d7b5a3a49beae490b1440f2ee3c58406fbe0d795988b5da4fd6b4378d +size 808095 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..224ea61642 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:646a7cde6516db5de9e2e55529bed32efa0ae03524dc6c03e4a42badc54b0e9f +size 1191447 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ec34b63f00 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:499b96b74e478be0262cf48d27fac8a0ff2f566abc8ff8886d57e7d5391e770d +size 1041717 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e6c38a0486 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:434644d4eabaca60d963aeb245ee32b2a36b351970f0f0a828b3cf59f0ac2a45 +size 1131219 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5b7d8ea086 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d19e4b0c53fc50911de04810087343fcea90d166bb4b44fc88d48af5b94940 +size 1067323 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8b90bc006b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eb720949c943d779fe13c0f464fb8275777cf645fe829c17b2230f9838b478d +size 1144873 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7e4c7b54d8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc8c2d249ff58bd552da990fa779e64ccdc1be5feed9334c67fb5234e88ed5f4 +size 998599 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ac60c74b08 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:524924e7da62ba12adc8030a2a464b927ae43a7e19832d6ec03bba2067239707 +size 1194845 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2215ad7bb5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d073698b5df30520938022bc15448918e27fc09fe20c287bfd0d737d5f26ad19 +size 1044327 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d2e2fc2b43 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:506d16a40458121543b54aa188331cf71ebacd5f7b7a10fa5195901aa1cbf507 +size 1040489 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..786b8a4867 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73ee1905b9c6fe1fc2a59e2acab19da59d0ad5f51db70f7814e64a7af7c72f30 +size 1006391 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d3c6f06476 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53e7d6369686076c8dcb8870e00c1d4894b695054c5aace37c1c85f431b81d8a +size 1153649 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9b19f6d95d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c759ff851d826deecfbfe86427c2758c9bc86f870f0792b46b1a060e6dd1efb +size 1007423 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b1aaed206b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:921db18bef03aa585441336e847a6c05d9954c62e4ab87dd03aac44102f4a625 +size 1177021 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..90c93686db --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82134f1454f71a880a908ff2d66f015958c52de51158d66f885baa06ba3dafa +size 1086831 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..254946bf3e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faa034754a87739012582d9a6839049217b8fa477a5265f18649204643866517 +size 1045249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b6cd863071 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0abf84fd76119cbd0338862215fe60c16643b25da634733569d09ba717d2c0e +size 938041 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d894c98c87 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdc2d6743a4cd3460565d1e6a65eda3edfed7553619a49e9bb73d19a312ae72a +size 1043089 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..82406409d6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63b2a8114ca1d3b74a58a86d9cf809cd75807a48f962e64e1db95ef9d36906d +size 957439 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2b971bf237 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4870a6989179f3a76689b7640434e0e33960ff539a17b5a06e2d82687d40ce36 +size 932623 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6f57824b36 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b5a0c911c822eda78ff155a90139c535a208741bbe1b5de931a5e3909aa09c1 +size 896701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..afa99990b7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81f54d9c287e573e46ad4e26a7b77ab7884ac8d31b88e8efada19ed8b4a4154a +size 1131731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..736cb0a616 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfcb390ac4cd806a6909496c2aab1610ccf536495fd65e93bf4aba30759cc59b +size 1075729 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5b30789b5c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:712375b717be4d6dfaafc07c25af4aad8dc03a89f09e273007f876fc1c779fa4 +size 1003413 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..706e81b49a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f31dd2abe1589c5ebf31fb1b985f7ce8441985a8a05d431d7a998c95e8263896 +size 898423 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4123be906d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:062b09948bc785509a8e8a52ab00e2c77a122e1498d432345664ee517465c1d9 +size 1096775 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b8910a16c8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37be854645b7aa83443ac52082061f9d20f55503cc2cc27eb9baf579ab861e09 +size 1002745 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ae61412715 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c3da6c7f9e57f8bdcba22963be5a203bddc46c28f0c01a9819504e86107aeb5 +size 1070143 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1d6b356836 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2937783ff8eff35d5f4deacb3e86b53e25bd5e85946d90ca4fc9c8a2fc76a89 +size 957459 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..18a4fe14bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:440edf1a96f80b023ecfab8c1c8a4cd55198e47f9c20d1e3e1a977467766e886 +size 1050203 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..15892c26e2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd908fe8db9587cfa290c50a3b1a0fcd73ac1cb09acbcfe346336dacf39f297 +size 960415 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2fdddc0ea0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e60ff1402c9bca855ed1d1bae3afac74e8d61482f2f0ecea6f2b8d39049ce0f5 +size 1100173 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..32e4162279 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:201ecda841ebf453a8c5e1d21ad80ce3e84141f067a0118df9e034b6681d8a1e +size 1006143 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b33d06b5ae --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9fbe922001b1fa67ee47a6a6d8ab06f89282a41eaaf1c69af89faa64f1e25dc +size 978575 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6e01c720ce --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:023057e6e7408c96d356932b7d773f7456f65738b7c011a72ef9e324bfcb281d +size 896525 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0f21273fd4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a61286d4bd1037607dc5411fe14b7e8108149540d15136588432785bc5f7e6e +size 1058979 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a302907fd4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96d76f64ef904e848f2a1f4426cf653a9a22dbcde95b43bf102ae498f7727722 +size 969239 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d7f795900e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abb2d17edf8a581177eaa4b2552eac84e30ec2ad2040e3593b7424e6587c8422 +size 1077663 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..408166d3ba --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c0817bf03b8d964328063f875a2480394ffb98e2a63d41749b3c2c4dfa01d15 +size 995023 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5780ab53b0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608485c4f301eac1203df305507e4b89e9dd9907010c73d11448795dc6d333af +size 1007855 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d9f005bbc5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d841aafe1798c1daa919b7a19894be2396ad9a86d136d688ac088baf11bd3b99 +size 899955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..61de4fc500 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82545c2e5c041571f3850d7f4497f523f91363bea46ee055aede7d8c5845c7c +size 984333 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0f025a402c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eb6aa60b6b8444641200baef1cfb204ead4c2138ff83f746b8449ce907bea51 +size 858377 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c22b1ae7de --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:324fb224b266ae396c6a1747c2d03f807954e901c72e957f98bccad09064bc9f +size 870711 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d3a98eddcc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42409e9cac17df172fda9bffb2b7691a21e87e9d454a2ed58582fd8102c07b02 +size 788019 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..16f7671ff6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2275f454e7cd6e150beaea8b5140911cd7cbd40dbcea8d905151c1740eea3441 +size 1032275 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1c46ff2e4d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd9933b3895abed43cb8e44145baaf074c7016c692490865b2ebf6a6333f7832 +size 968725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7a70ecd241 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9011ecd93a5c3d0286a98222afe38fcc4cf144e4637395631188eccfc818c71 +size 966019 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e2c8eda376 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aef73835e2dd75626bd9d7fddc61e8f9584d99a7c86cc7dbd1ee4c004d97b94d +size 860339 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3ebdb012b3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c901fe72adb7970e7cc59dda953a17a1b3f60d42e67655504e14fe823ae8c472 +size 1424891 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ded1c2a48b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8b1f03219b0ef8801d679dd6c8453ef955da1cb5df6dea682da7ca8fc012d6 +size 1172895 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..dfd80314f4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7e3ceeb1669676ef57cd00b60d23504edc3268cab26c6b111d2de1ea97f080 +size 1277779 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8360d9113e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3a718e777b86662c1842cce7b71ba7fa449c20fa289b3001183f9167591868f +size 1363815 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..39bf306205 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8b63fcdadf93b0e26cc3267ddd95fec10a4af483b3597251b7ddc5e4caa04dc +size 1116751 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6d2a85bfc0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1711994ec0e17895a4a18903e632b4582e340186d6b154d94d4ff1582747556 +size 1411813 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..357dddbd4a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd72ee94165618db97eb10e27bd617f34eafb41235f54e501388a22cf5755c1 +size 1159767 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bbf4622a0d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb161913224d2690519818f555572efb308012e1e08b03d3b9fb46171986eaed +size 1164701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8aba1d02fd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b17d6c5d1ce7748dc578545b335d734bd0fb61f8f518edc2836f37e2f3b7a70 +size 1361391 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8c9dbb964e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503fb167884e35d969f5f51b9f48e0b08f702ddb6ecc7e6d478af8f1cc589ef0 +size 1113539 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..377b5de4bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19ea8caf44fdabec79eca8aa1a4fe3ea396b1fccc1b8ada29d864385bc45d0b3 +size 1344409 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8174b159d3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ea93c839b830ac9c29e2501e366034fa4a6387a8bb317aee415c5ed2913401 +size 1314851 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5ed49b5eec --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b771212da3cba76131449888c6f765f480b9bdc33f7abd1f8703acf134e9e827 +size 1168089 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d2a2b8536e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5291e4e7c12d02427595f21f6c5448d29f8e8b4f272c6e93c6cac964701af33e +size 1061621 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d3dfda0d6c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fbd563f0d9f4ab2ce0615d686929fbfe3cf78f4778a11c345494d5e7a3b1f0c +size 1105537 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..037249ff75 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd15a8ffc8605efc3d81c7f2ddebf579e85643e6c040a20efadbfddf8f035ee2 +size 1070107 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4e62470925 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56139139b9122157623b9dce9913414c4f619b20b1bf919338d1860b815d4c80 +size 1289351 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..069fdd3e43 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f62241803622443ea3f8dcd526c7c6de48fd78be7944c2befae5e99ce94f372 +size 1264725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ed5f37a4d1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1761da7d4210f86c11a34adcfd3c030042ebc73b688d665cf33050835be10c1 +size 1116633 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..35b0b0b378 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1cd5f49e210573d6622400b6ca4830864518707e75cc849245cdee13ed40814 +size 1013321 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..98efa4de59 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f03079abcf12b75f66ebd80ae82cfc074d9fae41fefbf846dc86f2062b27740 +size 1287251 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e658568b37 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7fdb7707ac3595af57360987d33dd7fd925b9e5dc4a4baae0ba473da98b236f +size 1122181 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..acc70f0c55 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09591967c155dc9c7dbe8035dd61d174183d6524ae99ba043f512ffe64f47b00 +size 1138463 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3eaf65062f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95866c8bf7175028f02007e9f02d9eb450ccfca3c36e7e7be20245fe41c2e8a +size 1226125 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e55fcb76e8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699238ef3f8d65a86d25d9f1b42a1531dfdb671ddd366655be8856ad4198c668 +size 1066087 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6d1f37bc8e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aca9e7750d8abf6a7ee2aba4e239c345afb11546ad6c5ff284afa9d02ea95288 +size 1274913 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..854684ea19 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73fe00057edf7fcffbe9e7b3e90e4fbfe298702ddbc33ee978cc228fb8a9a13f +size 1109101 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2b19cbf58b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c691f141f5b1aa2323dd70d5a37116c84c28b58c827c994e6125af5a91ea3c5 +size 1026173 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3b78fe3eda --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90624d17e916c46204f6dc973a94654ca1d6d095d8b588f30fee05e264d59bed +size 1223703 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3c62c6f1f2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2757e18bb9592beeef2cc5f783f3b5eb02adf24c3bce1d715a76d5d7bf692e1e +size 1062873 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fd4cd4641d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdc0d82330ef2e66930cc380e8eff41898e297faed5f0e4b703bab37be4cd0b1 +size 1229955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..72ed11e3f8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9da68ad038bc768e1513dfb3fa0dd0cefb1d1208c47d9da0fbdcb16ff6ad309a +size 1177951 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b630951a96 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83709533305ba6cb64e201bd52746a1c418b7831538096bc2a6cbbd0bab3a0be +size 1119349 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e63e1f1e52 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ada68225ff048faba4ad2c5cf8f7f7d6b652740f80ac67d7bc404b551d91764 +size 1010857 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..25328a264b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d1ca5838c8854530fe59fb28c9c8d5492010641ec507d0a9679f74d8f42634c +size 985953 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..44e318f8d2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ef52af18e72083d75366ac6637ad1f947e3799b55088348d24ecbbfff9706f1 +size 930643 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d9c90b15da --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0643a76e6ba5a41e2c877a7117aaf338409d6e5589f496246dc4a5396c1429f +size 1174997 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..70aaf60b77 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d75bcf9ee13035b9aac0360497ab9e5188b7be5e4d8762b6860fd8fad91827a7 +size 1126247 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..52263a7d47 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36f8df21d426b696562963d15d7d44137175e0455ac22778652abf57a8f856b7 +size 1068483 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..74131311b5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d97004858ca98ec810d8f45e74ec5e76f04614dc39d888509b91f9a5e03d4a0 +size 962557 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3dd06af00e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9706eb5158982ddcecbc5daf71b414ef4057a34e125e05cc9df038938e336036 +size 870409 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f03ff95b76 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36251a8cf023745d0e78d2f765d02d21d63471774be3ee100a99104a7732cb31 +size 769515 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..98a4d9a978 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db8a75a287d565e602e205bc735112a1ec8d49c3ddc8bbe11b0b8f192f11da2 +size 871345 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2f16afd194 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad587f543e0c3bcbbbb5142854d827bad14b0d1a9278d6a62b06e7e42366aaa0 +size 768773 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..79f8409615 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:760bdcffcf902d48906353b28ad8405c27b4f0914b795801f53679e41001309b +size 936795 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..fe78e04c48 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b44ca7c308d972379d6fa735e5484284ebbadd37ba52399c2d3b5f7ee9eab181 +size 835209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ef416ac034 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c96268f855f2ced7a4a0d16dab090739eeff7bdde8e63540634c7dd9a80f280e +size 880575 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0625b4a769 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:964843e03709d41a09cb73b9e6c48c9b3ec541e31b97468bc26fbf5a8787505d +size 843517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d66299d071 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668e43d8febec7e3e37ea8f41a26d63601314472c2ffa16a2b20ba124040a983 +size 871645 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..181adbb93a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b4351618f3a98316db9a72fcf2849ee669362bcae11fc9dafcc821859fe5e5 +size 833503 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fa49642ec5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:540b5c6eccc73e173e4f9117a4cd50c0b6332c590876dc1ca34145316baf02de +size 979223 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..baf092b083 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd093d1db7d215668998a255fb022cedb14429c09d31281888f91c45297d332b +size 964121 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..dbf983a82c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39d0cb48651ad83ff284fd94fb377b54db5933a510cb0ec887eac3864378d857 +size 923605 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2ee6eb8a0c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:114b9a064aae75987d5824047286c42b96ff510a4b3f1de1901de58e17eddaa2 +size 807959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0f471c5066 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a60ddc905d1d28419a4b15953450edba39f7384f5ad118dc506f5bf9afbf4d +size 969159 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..cf14c548df --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53f7b1076f7d944f9983d6b4685618dcb39c841e10abf997746c3f496e166473 +size 954057 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0940109026 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:904eae96835defd064e9f6ae70eca50e92e0c55f693c5d105e48a8b120ab7dc1 +size 913887 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c063ca2879 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a34e5d6e3092ce685d36d57f8938b41e78995c85d764cc5884d3ca3a48133f8 +size 797107 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c39e183a35 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2dc482b1ab5c8cb67ab718deac5a0297cd111759de209b7e26aef2104d86e7b +size 734181 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2d6b9589aa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2129ce4b7eb131bb366d18bd7cac4ffc8bb92131658841c33efb94e1028404 +size 657021 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0a9efeab0d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad09298709fe9626e23dd2e956444db0488bc81f6f7a5a892263729166d68827 +size 735309 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..00b309bdc5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e5dd2c97784759cb69f7f8e399fd4701101e9f740d811832e53541ae3039d80 +size 674727 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..bbbc408acb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b887b545bccc4660d904a557d352a0c7fa79d82104d62415a861756f06e67076 +size 953437 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..9718f2e0e8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64b067300fb3fd58380fcefd33cc374d783df7cbfcb0f39568392ea3de4482b3 +size 843909 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1fc78d19ac --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:087cc1071cf0d1db59ea886041ae834466c06c5007ccc22564948e4df1b6c619 +size 746643 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..76430e91c4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7008d7c9aff37d815194df73d14ac9ffdb64bda77167dc18f0cb213d115a3ad1 +size 625867 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..99a8e53fc5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5bb3220fb9269c6be9ace6b6aefb68ecfe08c2734ced8f2990538524fc5790e +size 690893 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..48906ebd16 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:590dcb81ed98bfd86953983f7154f3bcf513522a0a3e0a733e58c5f5c4976d73 +size 574310 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..148666af33 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac02006666735e177bacbf22db2eb8ea3c128ed2b51439c084f4d267eddedb8 +size 724117 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..41d92e55e4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:230e51f41df9f2b86bb18431d93b5ce0679dd71f4b0c7763a1cf458e35c5b780 +size 646957 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3697a047c2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfa161c12da45c1a14684df7e3e83778ee6042a350dbd782df496f1d23ebf134 +size 724457 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..671a1eb1ae --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d3e828e3eaca117cd8829e4ec344acf078b5b2bffbb178ca174505b9e9e808 +size 663823 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..841fe27fc3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aabc9cfc91d725ca630dcc63589025e70e0887e1d27b6cd4adc306428577f157 +size 943719 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..804f8f6b15 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b8bf41f8bb4fb395f4af60ff4d2ea738835f55a27413df13184f2a781542869 +size 833797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..44aa8d6986 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0ebbb48b3e4e875863232d10310fe5e6c8eb331c1b222e5ef1efaeaee183705 +size 736923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3cc71c200d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b3e7d7f7a6a7e5c298414aae2c660c2f71dda960f805029025561a9829f636e +size 615802 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..78ed42040a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40e7dd131ada4bc55a8fc781a1eaca892d2ae547b6eaf6ef670cb15b91dca3ba +size 681175 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f60bf9da24 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5fb3a08c0f96e989337b501d4401dd0116659cecc8942c6f8c7bdd67ad1d805 +size 564246 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..00b9e472d1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed32214f74499243a0ddd6e6f700c8277dd78b638300d9642f01c02860a4094 +size 738359 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..043ae0863d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:058735cea8f84567b63d454fc8e3afd3da6c81b796c41d1e3f2faf40a9e22fa9 +size 670523 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7b851e3c5d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e055382853bfbe31185288126efb6548627a59413a0f57009ea4868d642f9b +size 738699 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b5f71e9134 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc99653ac62a7515c06b4e82f8c39736c8aa470cced2ff4c6282c1e7142296cc +size 676585 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..65e12dec59 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15d6774c689bd04726a68b2051c51aca7357e8728c5ae684c95e596f4e47f5ea +size 966003 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..70c2276cdf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11db248b0ba1e39250e133660e96bcbb74159a499c38b87920d60f513fe0b2ba +size 928255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..56511eefd2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:481f8679553db2d90407af1e234f5dceb91017bc179ea358981be7b94be0d16b +size 721467 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..245489ee73 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3812378ad2bfd4a4c621dec90d9cb82dda4a4c5a75ff5e7c8678763dd13d11d3 +size 630341 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..83e0a6acbf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:333f77739ff9896de636a1eb001d6dd04c30a1cae35132587a9017756083f3a2 +size 663251 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f8e6971a47 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e88790f7ddfa13a1e10bb47698739ca9b400512bc5bf75c5080a0e3c0566cb0 +size 577550 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7b9212b804 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56c57c71b1d9a83557843f3e16c82686f8188607ed2700a9cc9451dcd2d3b6c6 +size 728295 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b0458e5c37 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18c8bc1125f986da9fb1992be9aa3a1f909d8dd852b89f286ef261ad76d4b13a +size 660459 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4c7cac4ab8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:713cafec022a7b8cc55acd0c39fcb3f8126bdb3d10906b936dd2b7b69df94e93 +size 728635 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..604236cbaa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e01d5fa055f2b701ac1dab962c0b3647ffae75db06cbb497031a590bcad99876 +size 666521 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..638bd89d62 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:695ec4cfa43b6449d275ae5a7fd6f480f8e2ca90b9b45752132393adb44e43ad +size 956283 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4702b471a3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:742335afde7a9b786a83f206dbcf8e96ea499e39c1e93284ebe64529e96ab204 +size 918239 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c21b77fd21 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35dbf6f6f9b00865b1ddd8d741aaa42f88af0b7afccd82419db49a6d9290f259 +size 711749 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..57d60aaa89 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2a7ece451087789884c997e566cba6f91dd90cd7284d6d7d4bf286d8035afd +size 620277 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7fef70f1b0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d13fc343b7d4d4cf81204c44fb0c7796d10544b23f2a6225f6f446740c329b4 +size 653533 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5312b0c76a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:191579188214c353e080328c108d35b473c02779ec6e17017f05a65c9e799002 +size 566698 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..69311738b7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af01ee24ac40356050271e9fcb4a840f3dbbfd21d67dacd42c921f71b96ee04f +size 825813 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ede925c403 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:794863796e7a495ed00bc8849de22d7449781f8590334654203d6d5d5db1695a +size 731677 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b55f417149 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bddfc77587f34faf6aed2cb39a28a4c8e155cd6fd72b6c71056d8e6dbfd4202d +size 823245 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e1eb229f8b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:733525ad7ba5ef91dbcff2c3586215b4b76d3700840bb64ce56fc99e19dc5db9 +size 737791 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..26680f8321 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c92f6fc170e92e402e096e95fc7d0e57c1138e304da9fac00e0261c5735bfc1 +size 893185 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6b39946981 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0333b4e6976ab113d6de503530f59d6542956a048e394281b1004dea32b65ba6 +size 798457 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b823525606 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4630a88fd7b504bbfabb396381b7b06c8451a5adc167fa7639a33f29ec72d4a0 +size 867797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e228f19f60 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ee70b3f5abccb0f00c164a72cc8bd724551f1bc38a36abde6cd479d63acc05 +size 824771 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..00c4a68bef --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c289521fbf54610de8a5e147f97b1847b87b3be48cf9894f4a311663c9987fa3 +size 848359 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2d3e143d3e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24d96ae0d40a8478c3ff5d1f7231ab32f6b3d5dc4bd0b657f021d61c4a8589db +size 803853 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..70ae1766d7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92250f987f44920bf487692a8048fd50510ead143ce245a812fb21ad5ec67dc6 +size 854159 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d4eaf15f64 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9682d86cfe139cc43535c7c4c2cceac595f401d1723a814249da7c9761962b08 +size 776605 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f763d4c8bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:673278f7f0a5f0af3a9c46845ab800e7778fa457dad4eff00e2ca77b5d509686 +size 849023 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4ae1dfa27f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4301a5131a519d466ac60ba5694650805060cdc4feadff2eef2e54a5728387a5 +size 795495 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..46ab740a62 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e2a29a9b4c1f544eb093b9fd1b5de31258e71e92a86c7978967642e5f9d0d6b +size 936369 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..32c9a896d0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f9e6d15fb6e4bc08346a0df4647399b28f8c0a45ec34c4faea52fbeca7cfd32 +size 829405 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..cf5ae39071 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1490a0a8584c9ce63fc613aca3814db3adadf74955681cc2924827f15346725 +size 866029 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..407f7ca4c9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e49a91a5d09d3ee421bfe78fff06ee1abb379f564bf5f684b2259e99bd710393 +size 747917 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8efe099ff1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e59f3aa20ad9181fc303d4924ce3951de5e6b9cc8e76dae2cee778f4195a611 +size 804311 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d15e11c980 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1e8c069d6c32cd040ceabb40279981eda5c299de6e65bb6a542c501b7e934d6 +size 682697 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8e74f1f10d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24e4d1cd270d645fd229229e5d1050049b3e23e607c714a7fc6d95876aafc9ae +size 832453 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5e6d9ba58a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8f1c7030b1742039a12fde5bbe978f43a83472579cdfe42722e8383cdac625c +size 755689 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..855e4fb8c6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bf6efd3266379c6466c1f1e49ac9b55f519def3bc0a08785536bc3d173907fd +size 827317 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..92b3472606 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ae5bb5be4ed9300f4db0e37ae955d894aae24f30c4c8e2cf7abe6516ecbeccc +size 773739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a1fc0533e7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f46712a89d0390ed4870ac402aaaffae2aafdcd50f1f97f2626fd5028f967b6e +size 917721 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5087d1e74e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f9db62edc3d0dc3a7612b11caa02fc5cbaafab506554c1aef6aa3456b78e816 +size 809277 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2c18f8b7d3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11a617424798ea3bd8e1ffb0c8b998d74b51bcaf9080b2efa1db0d183d8b488b +size 846543 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a56e42a18e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb5879d7ceb78a0f29d9dd3cfc701039b834e9e2d984197112d9a9c691365e50 +size 726211 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..35a8f6ca80 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c007af271688a1350dcc809e8d73b7ddbbeb50d9aaf3bec87d6f4e4a087cd +size 784825 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d8bfd5a94b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5f15146583bfa40d4d75b44ab02d3666bd520e9bcb0c558d78ed45f1f2dd6c9 +size 661729 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..86c94109e3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e0a3e6495fdb27b692b0c5f2dcbb75958e09a2818abe50a71504ce15fc9a0b9 +size 858387 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..53027506ca --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:773d58515232b5bffbf184107483d79a9ae363f4d71f1c773a73b974bb9b784a +size 795583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b7c1f14e0c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:900f696751fb2d68406b3dc5f229d90c139d8f5ab900793a1ed804f9d15c7824 +size 853251 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..87a094d16d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32e2fe7ee05cdf17629db96b31cba71915fefc59fb921a7ccaa0e69146c6ab10 +size 798193 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f1b00452d0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce8a48ca008729636dba058dc468c69ade80f521c6a3d9b1db7d6bed562d4c2e +size 951843 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..8c87b45d1a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e69143f0625d7663f86fe2a51d7cdbe647c837c60c0a72787d7378979950db9b +size 892783 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c3aff96be6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1833506ba1c34af177dcc93f56ef41abf1e2c4f618a94a219ca25f875cad3bb +size 840853 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..743151a4e0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a939e68ecee194c7d5077c3ecf0a04b07eb939d0d88e0475935897381001bb6d +size 742623 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fc93158ef5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0b70f796c28f687a774446dcb00c842d46253296cafb521a846cbc7aa8e8b0 +size 777459 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fbc1440745 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:527d54e87b142a744eb72dd863ce89059089339aa2aac50b626a8ce3ae4511af +size 686085 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1f4822a765 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47678148771c9256e3fb296a9f02cec605ff0888bd48058076fb3874d359b893 +size 836631 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bd815c2273 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ab26786b14525c96ee23d431f71a14d8f8a4a2cec9b6cc6a38683931d71bbeb +size 773827 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..76725f99bb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99ff4c210df647e9c1aa216e780e68fc46d1840a1cae7ce5d5d6a0ce8b795d99 +size 831495 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6e2161bb7d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f049c0185335fee7ec43e61fd5b2335b9162ee70718cadcef8857c050911d6d9 +size 776437 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c44a37a9b1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3014eec9ccfa44c7d953b3e71749298d69f39863c56d5e65d89272c85eb9e50 +size 933147 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3172e70504 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f94f49f69d234b267c5b951d7e6a03589b6b4e225cd2c8751318fcca8a65da77 +size 871867 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c3feeb5864 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96dc92b712dfbcbe8aded4cfbea554da76cd259fb8d1ec763fbd2bb3f535e49a +size 821417 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c557006029 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fed226cec425c9c622e2be9e5e7a0360741f07e661088cab0b90cefd9ba9abf4 +size 721903 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..aa133823c4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b6acc4dd83f204ce1c79e12264b2a83eb266f23a52af74cd343dcfc6e295e5e +size 758021 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a563ea6b3b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:612f1f5bee2eca0a18fd72fb00b50ec8cc83c7e58600a6110ffdc4ec692b07ae +size 665167 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ac039825a2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:514cbc5539e384d5795187229a9a33c5c8dfa822d65bfb98465bcf780adec5f9 +size 803807 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f9cb727ae4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:256929633ee01b7d4bffdd2de70b2d19f209bcd491dacda730a9c2b06aba5367 +size 700693 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..029c00a2c5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c3b979af675dc8301707cbe91eddc932e04cfc0176377e095d259280b6ec781 +size 802917 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4c4828d4b6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ba9027d411b10b341c4ae39906c77af8eb43dc94b2586fd52e349f529f7314b +size 700937 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..29c271a7d0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbfea838411b911f3909eea8ed48bf1b69575e6663f1e7b5d7d8f839ff4cf9f4 +size 870983 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1e0fce8b79 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0311c0f3040f835fb0f7299e2e98fe3263d04a70b309719c1ae60c1a5394c7 +size 766387 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d3a79bf5ca --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53fe0680b220049a8c14e9bd8d5e54dd54d038e62d57c475de4173560b57ca93 +size 815403 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5c9e555601 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb9a526735349e5810ee698467a46f57d5b6a0747c42619f18f38aab3b70d01 +size 774497 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7bc96f6234 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827bc41b40744b03e87e3ffbefcf6d3b2a1c625f9b8547a76a110ff09a7633cb +size 805685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e3a6c53433 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb1dc42a1c80655c234a33e93e8dead0b1dd3da023fe6c0983b73d2e3107a18 +size 764483 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..13a2450b51 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26cde4e16df201ea40b99fab236a96d9b914caa6429b107693b79b6aac819b03 +size 907343 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ce4120308d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1ca10786e1bfe5fb9c912f53517934c63cae76b3ae87449390238e9da461cac +size 898259 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e791377479 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55d7fd9919585d6043964fa612fee3108e297d38c34b2155f390c9d817089aa8 +size 867017 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..271be5dda5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b05d0997e756300f51f7f85ee2d273e6be7c7122a6005bec53820301ff1fc05a +size 757045 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4c8a48503e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99468d4ae9a161b9f0f1d3ff09f225d146444ef43d29153b4830f37fb411332c +size 897279 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6c09d94464 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb3748c57cc8bbabfab0fda93a1285d884a63b9df588604dd00ebea04d47c6a3 +size 888147 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f4a99a172d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8303417731f7b2b79ceee6524ae9a2abfb98452101d3baae91fe90795f3fa51 +size 857299 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1dce29b1a2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e6d0f3a7497a196a45fe9fad896bf353eede07028e8e3a3da0933b25735fdd +size 746981 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1697de9291 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26292ea420d5fe33eff250d3527e273632b55098d918968c807f1aab91cbb35f +size 697425 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..731367356d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45a59bcd6b5d148cc5517d4e1370ac4c0dc5e5322629ff636cbff47f13b79bea +size 656427 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..933edc6454 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7ed65b3eaaf6e1da5c7c52158837d4ce0010a7f7231c4be4395d260e23a5aa +size 714243 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7823248e71 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6290dbce181a9f9d85c371283d43c808ebf34bcf0993d742ff5369d5b95745c2 +size 673343 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5ca36f6133 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce9b6b766bf9b435f232a710474aaef6943908769b02e25d57b178645e8eb286 +size 883383 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b1557cbc02 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a17e9cf2c602ca4760feaf2e92033911b9b7ff3e001419bfd2ab62ac62872e6 +size 774989 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..18d67db788 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1bf5e32088ad9e5f98f1092626c2fc4c0d4a517feef7ea1ea22c98801c349ab +size 735639 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..20cb3066ad --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ec9ac141dee7bd87611539c11269e40bb06a00a4631ef1d27a4afbee9e5e7b1 +size 616884 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0099ddbfb0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b8a5e6fbcc0ccba831392e55b1fde7107a35dfc04ec1c9f676c8960ae403d2a +size 687241 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b2acc06e6e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e210c9b9b0528cfd29182ef2b0c25f2cbd99ade5049ca06bb7558f0df3714fe2 +size 572828 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..458821468f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31e9335140e165d95f808b356af25e8ceeebcde7428f62d7c4a0ef4aa94e75bf +size 686573 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..562174d889 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de1701fc2cf96baed61d508e12dd440e242764811f6880dc3ff3907268baae27 +size 645575 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..73a0b0afa2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0871f208e7da18fd73b6cf55e6cc4ffc7a6634921418a794527e5aba0f6143e1 +size 703339 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f656d136f5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d612848fc034cc8b64b82b7ef2c064146901b213bf58b6a8e6861162013a06eb +size 662441 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..1ae06e6936 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab30efc7b754697bc645eaad15106d7bef0946f0313f129355f3b8546a3649d3 +size 873663 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ee249794cd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f127ccf20e5f79dd604bb7c411f3239ef12a3c2b8a0407d0c4eab16341db0808 +size 764875 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..84643f09d2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe83cd294ca1376eb2bcefaaec417c85b2a66b2bc317d995a30be858e72a5c46 +size 725921 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ed0f2ffbbc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da90838a519e53bd78cddc559e76e838158bccd65826a29c8fb198b6361fb1f +size 606032 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bef3432acd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e6398cec5b1b3c8291d074ea9c7b987bb4f2ac83eb8f75ef53aefb72e32317 +size 677523 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f24b006a81 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a0f9946a7d836fda5f70d4213116f75004bd98fc13bb4a18cb87e9d50a2f753 +size 562764 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4201fa38ac --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f83ce6da9256b5ae59edd9cc52e95a110e72337c2d9dee3c7dad35c68ab73356 +size 712407 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7a6b67a863 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8e2eeac439e440554189f3216201ce1d36ec5a0f0cacfac4d4d4daaefb6b272 +size 669929 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1c96d3728a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76e365372d3e86c6769266076c4bd81e6101eb1187819d7dd3a9b2b1c9c8ac0e +size 717631 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..701bdcbd24 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7de86452b8debdb1b2d02fae6dc0e246e14e185df45fb108ddd40744df8ce69 +size 675203 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4428902218 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eff16bc5a0cba4160a68e8e91d7efc17e5154372bd4aab3921dcac1fd72e6d8 +size 900831 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d65c1cf767 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f1fa942d66faa894aecb2d1a8c15386a8d16e0b32f16d7154a8297846724fb +size 858445 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2f9b525e4c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e758cdb29fe6da2bc00c0cd68e66b8a0239d59366f3fd029ffec5c7d2b1e93 +size 709675 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..31d21651e3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f269404d5a55a0905d020c225300ced2c7b38d365d9aa7582a90f9fbdd2574 +size 619683 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3852fec7bd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9e62524e03b1a46f9f0d073e6ed8e3a9787c4436b67cceda2c793f5e410b773 +size 660389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..88245f9d07 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8526f0db680edf7e95f39458f07b5e4a80a03262662bc4e76a3ee441c968841b +size 573800 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9a219a7319 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:919874c8c235f830965e22bfb803b83673e65623f071552231ca8d2dc1cd57e5 +size 702343 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..97440c8ad8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d3bc785f7778fd3fbedceefe2cf462f73b9eadac6fbb1279c899171e0d3a49c +size 659077 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..32c4887831 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87cdd0c21c3e2a0c7013a1f0e88a11bf0e68e0e85da52b0c3f1a3af5b3f8d2ff +size 707567 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f150689cee --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b6495e1b7520c8e3abbaecf468261c574b158c74e7d2b99c0ba1f6fc28339d +size 665139 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8a25ead324 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5aefc055f84060a6a406edf726c89525f40107de830e2d5982717b9bc3e527a +size 891113 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ae02aea40f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e9b5edf1e8f8161d58711096bb5186ffdbc0779ac21c6017380d5a27c68b39 +size 848381 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d4311ab5ad --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9b2fa440bad00844d54594d8137f61acd2e5fd4682944239c9641096c2a8b1f +size 699955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..568c7eb7d6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ea7cb78b6ec61e7e86be7d604a90387a03bc11a160e012fcb285913cdb81d17 +size 608828 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a577a72f77 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be4df7b03575a5f5545130bd73faca9bec8d246bf3824b4feb64f8494cf0754a +size 650669 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..351c609eba --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608099714e8b2e1af1f6a4705743a64842f62a0ebb6f7e7c8968f4681faa414b +size 563736 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b02ab02318 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1975a7a4f752993e3fcd83f8f39297c7b15eac9895a876f8cdd78166c5dc3 +size 855969 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2f8c9d48b8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb30685ce763830efca4db2b6e63bca57427de5141599ca5277e31736f76f19 +size 763017 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..84911ef7a2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60950e63e8ffb4b614aa5f48887bf279b804b32d1bcccf80ce29539efa282d0 +size 858237 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..89e2c3c547 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c59f23efc819de95125401be8c6c2745fb5a288fad47ad0409f0f92967e64e44 +size 774411 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5a6d4a4d41 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c231c467cf244560bbe574647d8b23690137052bd99e250ea4f5b84477529455 +size 905305 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3a353b2785 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5ed803b58cdde8fc1401caa87e371cfa2e52b87530b5011dfd53f60eda00cfa +size 857591 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..633026553c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e227d9a7af99c5e7a1307b2685ab2e22c3edc8db542130ac206ad7b8bcfa509 +size 891787 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a2a58c18a8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6400b802d73aa2740eb5005b44e8e15fcf814c340b77064b7d3b99d06d4dae98 +size 842693 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..57979d1438 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8833bb0273a5465118f215f4a79da901d5719f4c9f1edd502561b1b587c2905d +size 975405 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4f839fde6e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0790d68824deaf1dc342615b5c44f87ea95563fcf7fdedd6a2124975a0678584 +size 872043 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3e5e302357 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3992218c114388effad733cad69725733990d942b5bfe6152decc84d08cdc6d2 +size 962677 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..05255f75c4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bb1dba2939e30712b21cd568ff42f4afe145920baa6dc71ef2ed4e718cc4800 +size 854827 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..453e0daf2a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:830df6e668da66a3e77c1a551df3c60143ef7953a06956befb0b7bda6133f11c +size 856763 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..c3fa332557 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:756d89af0039fb2e7a26071f75315f129d5c9dc19cda133f9cd8c9ab6a5fc844 +size 763021 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6ce92be3cc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb138068a50f135a52cf1747a21849a158d0c665fdb85f706c827915d9b0912 +size 859029 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5b92220ec2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1b1f2d442905460239c72379d89cdc3fcb1820f4dbc1c809998c197d50a743 +size 775205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..82a01e8455 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db489f83661132a34675fbef4e457b595c5ca434e7b305198d5888a515489a3f +size 931353 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..22d74f778b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e7ee54538ef35267bb25419b9720c7a85ff918e6cc5c5309551a3d6a6de18b0 +size 889121 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..da906dc29c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbaa47090399de797a47658c75e6eee561b0b001fe91630fcde4d3c3c0aba82c +size 846007 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0a87c2667a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68245dac994e518d8b4a45690e471dd64bf8a5973bee896b84d5cdb1d9f5e481 +size 864899 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b5803100e1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:254faba17635b694b0368613bd130e070e87b83aaea9f727b6a8a2ca5d30eba7 +size 827305 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..06ca33612c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab8cb8a1a1e8d4e0bbb2e5cafb380bb81af2c9f6bccfb2d920d43161ed85673e +size 938697 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..cc3689c5f2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39cb1353c45c782a325a97186ee4fe5b2e298bffa10c36cfa477ddb92736b07c +size 895923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c4a4f1631f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a830ec1b63f615006fecb399ab6546744dfa0b2c1314b1288463814551d65c6 +size 804511 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..682f06a055 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faa04a1190fdeaa39fca62df4c5dfa44dad0c5cf07de45b47b466f5bb36b95b7 +size 878263 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b8ee6e5d48 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d72167fc32b9e8e8968972c24cab20dcc3bff390f68a66a23989f162df594c46 +size 839879 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d4f71062b3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41401f045e41d1f2829eb1e471387c2e3ae009379e2fe826979b7e72cc60592 +size 941249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..833dc145b9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f618f1529dc49ee2652c2fb372b33944c4df3cadd9baac6dd8718c0c2d03b419 +size 833497 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..da3b1b4c0b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a294034d4f1de9213967cd0792df905e0c1ffe66822908b9b39301bb94eb96df +size 902669 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..535def1b8b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:624dc191a7467e7700a1a2aecac45c9db3dbd130c599713890fe7fe13e1e01cd +size 790575 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bdae176e1a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d906449cefa99d74df215757bea8cdb5d89d13c0f3cfac756238121b13b7ee +size 800501 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d68ce2987c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ce2563e8ae5f12681c68c16d8ed3f9817d370a5b32f8d60a25e032e6c21a2b6 +size 697929 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..da3a556bc4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e39f769ce18fc4091eeb23a78bae65d5b1bbc31914bbbda6baff8c67d14af5d +size 885253 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c5dfe2f208 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c1da7027acdf63d788130f37d18baf1da38cc461b30a14f0e11ec70c619a611 +size 777305 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..59e6d6c1ac --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bfbadd3414ee50827effe770bc66af17509363c8bfbcc597748ff2a166cd8d8 +size 851163 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f29e5ab1ab --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:403762faa3cfebab448f49dc554d18bc2b6532b12cd698eba204ce2528b6cba0 +size 738131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..73a43e6181 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:219fb4c1658b5296449a103e8e010805f7f2d156527afa4c9535e22bd0714661 +size 899631 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..91d300d5f4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7674a69cd9deb119baa1f11e594f4f1a869ef8bf679c4550a720187142a89d11 +size 870967 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7faf9c2dfb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5db60a9cd96b1460108f4988df41fcdd161b5b3b779df1f442b08dd360c73e0 +size 813447 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3c72023ccc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e190c6f0d9f0ba08e88a006672a86bf3322709fa97e54c865d6a4141b1b516 +size 833177 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4785c961cb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832254280524ef5315a453b5025e1ef6f414b9fc3012329068b01ca0aeb07086 +size 808311 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1940454dc7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faf45f1d547bf2a9c1d286501b0d1dfa45c8f79a0005f17696b9facdfdcb1b67 +size 906977 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..22ab98f5ed --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0de782604bd52ab6dbd44f474a7a3f87d46877ac1d3c64a7f68376a5bb2e12e8 +size 877719 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b874c9bde1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:116573470123f4ae8de89122a959b13667b6847da1754f6577c448e3e3728123 +size 771951 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..291ac5ffec --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a82d10249b41387d3b62ae7b7ba71b70fe3d81b541b06ef17fbd16e160c60c41 +size 845751 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5a85310cbc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d6b31595418051d7b7ac1714bd59b2258f806f9ef4ca184118b5ae70cc0f72b +size 821675 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d578ed9b65 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d53f10478eddf7e6648c043df7111e6180fed9242702945e91a917429f81403 +size 911255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4c085fdff9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e2cb1ae7bd68ccca54f9306f357f3d757d444ebb4382e0d91e76ebf3f108fb +size 800987 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d8a21cd2dc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb0108867eb3c8297d14bce6fdccc6638a9286e9bfbcac9cf0593f9d216a756 +size 886783 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4511668eee --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b106526161377bc198608be78bc6fc07b786937532aed9e4325dccb51e56a2 +size 773209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5ddaf62577 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad759921452e6621b399cf6f91f629b956abd119faf32dfcff0cd262bb3edc84 +size 769717 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3fa8f6c745 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4addb71fe9591bb1cd7dad97031f1e3cc6c5decf80be99d638d989e16af8d89b +size 665369 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..eb68cc2e7c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:213abe180b7354aee6711665a6a3c9680ef8d91b562298727b67adc06cd201aa +size 855259 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..682b49517d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d239589a949d7eb95cdc672c52ed5c24925ab4b47e278d8fa4454c164e8676f4 +size 744793 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0238f3f73c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f44cfbc5f4019f186b9cbae46cf1b2b6f3b35a27dd5adf78be8b5b7f3f512d +size 834537 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2a223f5d1c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4917a3440fcf373ab50d048213d38933d533f5989a321400c0ecdfa9baedc869 +size 720767 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a5d66772a6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c903da1c3fc8e6a79b4f7964627c2b3c8ee4bdf7a32da7d30f028334a083315 +size 996423 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..94504cdec5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc622299118b2f0f9e8ed3878ca3ca66ca269e2df3b75062754cb93394f2fd3b +size 940625 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..08593465a8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bc61d0003d66dee2ebd8473e00e57b279e1a18465e0aef3ff3dcccbfdbb2743 +size 1078423 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d30ffe0a09 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b86f029c9cc1ef290fd40fad73376fcd76554d1745522baae5f0ec1913867e +size 922523 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7c3592ca2a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9464e39921248648c10c230e307d93862e3e5e613a8c4a5a2e1fe7bea69adc33 +size 934459 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a6dd84c6ec --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af4bee192f0b76b04f3a10f23d833a5340f2780b6a9c46dfeba1164aad101189 +size 883841 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..eb417930b7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7e4728990dcc3f0295ecf09ef3d4bd3e481ec08a87f73ac9123064f15f4ce30 +size 999033 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c6cf5cac9e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e653a5ac6712a0c879b4db87248f4ec42b96e434a0446091f13fef7b50b23b8 +size 944023 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0b54737e14 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da6634e8473e63fe860866c812900391a80d8bc8136a37516aa3afba19a3297 +size 986609 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..80b4362e80 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:100d3822e750abbce920e5744fc1f380ece3577ccdbd7eff754e044ac5eec82a +size 860257 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..93fa257c12 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aca9bc235c8b5cbeb3f87434bbe0c4b75b3d9c6dfba34388b4f8fd658d176526 +size 943185 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1968bb05a6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5968a34669d129072165de3b219e16e0885ca0ba6e5b707bf2e8b13b2c6591c +size 891827 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4f14025775 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:393d4dec1e87800596647a70a6ee9b5cd1df01ea3f5c8f62f07c8210b43b98c3 +size 1002373 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0f6396dacd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2ad58042ec5bb67313129c902303e7ccb1917808e4b89d95a7b490e77a14ae2 +size 893141 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b51319bee8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e621dab426cb13314d1b043f019dfaadbfb18c7b6981704bb2a6066904759d +size 949387 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..567b566740 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b9b1332cc738763742e5bfb6f1181dbcb32bc0bc2abfe2ef7a66470fa2e2f10 +size 837245 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..35afc0edc9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8405a6994f51eb695886565645f082720be229f57910fe9ed4fe38242d06d0e +size 993797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..36d3ba6744 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0f2cc35d98e7d37a81e473a463ad5c28a1d1284cc4bb7b4d1cc05f9abe32c68 +size 866163 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..635f40bf46 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c5c7d6615a3f088d29030530bfbb1968376391e5ea0c5dcc17f167d4c936fd +size 880175 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7086257f65 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aa856bf8e497535bb0eb1b6e57770a2e71f9279a6bae88dff39dd4ee5164f29 +size 753725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e734d1f437 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8999411c5b04c696ab850e4ebd3281ceb94d8813e9341b38583babaefb0f5d28 +size 941001 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1a82dcc43b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c6cc318b83e9f490e105d50e3a9eda13e378a505853776b88e95794461a74d9 +size 833643 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8c56aa3b0b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfb386d12ea27116c03777051be9513ba5ffef9b861a7739cb79049c017a8412 +size 892503 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..dcac416211 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f853fd9e9b247d1b71533a350b15317d95a29cbf1000c29eaa8917d60dc98426 +size 782235 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2fc82756f3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66318853041df99697c29b53fde2b64726e33eeda323e6b95891d85b2034b533 +size 958831 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f937ea8941 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2354e40f26c4d9f6ab3ee7e8917eb8e5e5b2ae04bb7966d94a7b1d8df32206ca +size 919511 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5cfa3e42e8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3a8ec77cc722cb5b5ea848be18f35687285d3b2896c53c9eccfabe90caa7c14 +size 1054743 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e84faa3d66 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b44afe500b1f9a5e1bd014e12ce08ff60033b71af5710c97d076c08cac3d1c5 +size 884141 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c73d00fef0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcca6524926703eee24faa0bb6a16c9bdc62d70954c91f2841ca1860ff95ffda +size 896867 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7c9aafe6ec --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8586a1dc34e6825634a30a87d4d82fa4f8dd10140a057482a2b896cb1f359c16 +size 862727 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7d2c75065e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44a55e325473715de5b6efbed27d46f0645c2a16531b9beb74b6eea5f2868def +size 961441 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7d36697284 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2ee38d92a40c882f2d47ae1d5fb48c3afa82462428758dd83c6009ebfacc7c8 +size 922909 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..81553cb981 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1b71db782ea3e4dced1f58c633c23c8b5c9468e3c2933e5f847e6f8b5586aa3 +size 962929 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0e02716296 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d42d55755df69ea004db7ba06f65c22bd20941ba95881010ad75bf60738425a +size 822665 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..55965f88d4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76b95a058d49f789f234b8dd9874d66012df93e0d85baab8ada6339e3669c903 +size 904803 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a502898d66 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05b02205b122b037f2258f49905a150649773d4fb41f68594632b68b3567c87f +size 870663 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3414db746e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44aceeafb83c712ddc295592c80a675d6414bac28962d97ad80191626f776b26 +size 965915 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3d4dff491f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:992ccb3237276e174af7e008221a9ca43619b9788d094bbe7025e25551f1ee76 +size 854759 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4a7525b3ee --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0b43e5ce1fafe55191c35a9c7a34fd81ab063e3338202eb112e8197d87a6613 +size 932367 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..aa2d02d7ce --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:973492f0a988c9a67269ddcc1e74ac49a4176923de7410fdfde96ace883d31a8 +size 816919 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..94d73d63e0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:015cc0d9b5b30a86c78cf6657dadad6a06f670da3777b8bce50bf3b84a8f9820 +size 967157 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bc34c54272 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:886626086847dda046e952a696169ba7cbaf63e8944d46f1bfd8d919895068f9 +size 830495 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..410cc6ac06 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02c7c95d02b8a87599b47647a26a0660f934e310e782726df4bb6ac377389633 +size 855655 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3e0d695980 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29d3f7fbb1aea87e46b44245831d73c3f2d6bcbc285970d8b6cece6cde1cffd0 +size 716183 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4a19b19565 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b5586645119fa16fd03c4c20beecac6b49d29d6fa3704015aa48bf1cdb8ba61 +size 905333 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4c0bb0cc59 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e01d024479e94c9e63c9809a21656131a655db048d905c073181b260a61f8217 +size 795261 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f6f14ca28a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee3fc547271946a9c537bd761f34541dee54af69510a40327ee005032e47308f +size 875533 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f30a15fffd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e41d3a55ac1969e1428ac01d47e99e3bd6790f8e982bf93c3bb67121ae8ec15 +size 761959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..847de3a806 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad2a1bfc446a3f65de7ef8ce33361a05f689dc2f05ef34a1e02138a9846b20c2 +size 1124395 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0dca780cf7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c2d8fb0367b04a493e4b9f9b59164ed5b8d1c219a5df749613632740b49506 +size 1044965 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d24a507a89 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a78ca8489e360458a0b2c6762b9a2feb7ebfdf181b5fdc5e855e5b79821d0e2 +size 1077971 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b5ff276afc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64506ac02e8da3c690ab4fdf2236a562e1b73daf691b4220183329d0db5a86dd +size 1049455 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9b143fcc3a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69174982ac1c3ec53feec205319dc9d9027f0a327ca5a30a19fcd9f04c850771 +size 973627 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8bbf028682 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96f481d6ab46471bb4b9d3766948befa7c97bd781acbafa63b8d2e78aa14f869 +size 1112105 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c2456cd38d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86c152131aaefea648f76084701d7fe0a6fc4156c2ed5224a7acabcea15089e4 +size 1031887 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c1c3a0d524 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaba36b5895481c117d7e0fa61f39c58f9d0a24bac9b73215e0643cb391cc118 +size 963809 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b049a18312 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:710cfa46f038f0ce00217cdee6df7191b2d08ee89fe39eab0a64e05423e020a1 +size 1046243 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..22a71860d4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f65acf97b0be176ca1b8c44cb20c4cf797cd2b28e1937dbd8c757d49e27dcc36 +size 970415 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..eefa8d915c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c3c1c6f957ba346a700e33a33597870700dcadcfa8212f158b871050a924f63 +size 1122303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..24ccc4d4bb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9459cd351abb3b9a9caf1d692788064135089986a4ed2228004b71e77c82e2cd +size 1014551 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..22046650a7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0788fc2c8d44e2b1f25744aeddd992c1097f7f308e6567cd7e7a8a57e1585415 +size 1046081 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ab17290aab --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d13c077cbb04bdf220adb009076280bb4c7ba7eef9a015cae45e9b478970fd +size 934185 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4d449b87ac --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36a278137cae29b1dba84e7728e3e88f72e9934a9cf5f2c67a8277284ad9af19 +size 978447 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e491498660 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f63ac44aed69203349712950404a06295735d9010d7da0bb6bc287e7de077cc +size 868227 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ff27876149 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d718e927fb92a1b908a20d4c6aeb240120aa6ce3f798b4ff958c07c28ff4fbe +size 1051705 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..77faf0c8fc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b5d6c433e1507bd52059a4268beeedd983f456c3612eb87bfd3838842e5e7b +size 945285 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..adccf721b0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c040dbb59de5c411891de49fa2514cf637abea6cd04f0c2f808e4987288616 +size 979971 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..eb37252622 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e34a81af2acb90e248b9d70ddd34c5ae85abd52958c30bd2126b5846d8d7084 +size 870247 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0f11a8bda5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a39a4aa88602860c6428f719d993d3f38c7cca90d5ee35471f2ed56674417cb +size 1075899 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..cedc99e691 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6974a044d8c8bea5200698792405e3d26e645eeab9944a80865d8cac07d0764 +size 1017981 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..85c8c646bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:178ba98955249d0a41c470e82fb6d45ae39df22014440235d8f0a472ea1cb758 +size 1028687 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8d699a4be0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d49547f003ac6ee2fce78523426b88ed00932e52805f884cec57a8e140e3d0 +size 1000171 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c520cfc384 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ee0caa9e2f3ca91b4117af9d83eb1cb35023db3ea9320666850cfd09615a5bc +size 947431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..66aeb68480 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d0a9caeff6049cf68402889ff3f54b18edb04e9c719018ed9a75edee7cc5ed +size 1062771 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7305cff94b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c975e7e9c12b389e658dc55c8864b5691d3bc3338e382fd4cde42b21271f5d79 +size 1005691 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fbf581ea5c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4836dd75223663e12fc9aa3e578b450ac0473c2ff2f9938fd92e8247f1891972 +size 915313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7d1fa96680 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ac3e874c4575b59179819e8f648b40b06f3da4221760a8556dffd833bd5bf03 +size 996909 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..faa227abac --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c024215d8553f8a766d7a65d1821aa620057cfc79a8fadbb15e65f0a1b8efad2 +size 943429 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..349e8780b0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd72bc3f5fbcfc3fb5a4e3c0a7e5528c56518a95cc942117baf8f84d5172a12 +size 1075337 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e365f2056e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:360f70066dd70fd83a73847c57a6ac8777af97a84f89080f624717d7ba23ec8e +size 965217 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..87ea603edd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d63581eac4de8268adae9745d336141348e35fe7f755c8810b76cd0a2932233 +size 1022745 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c272fad9d9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6d19f4703669a6882865e805a4c1bf8e24307f1888309c4f908cb37b1496e4 +size 908037 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b2ea57a86d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36ee82ce64a1755dfcefb379015724833a57258956e4a98a15370121a6fe0afd +size 932171 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ca269b3790 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac71946893ed5d207dac3af3cae3c20a22b75e11eba700f5c533f8a4ae6e895f +size 818943 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7d97ebb499 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bae3ca5b42fa65607b776f5add0e51124a249807689d596d5fb2e5fdd4a2ac72 +size 1005529 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7b25fe57b7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9e8120783ad308a5ab8f5e0259251d25a56f4ba844fdd4ba46bddb306a6dfd6 +size 896741 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..977d22abe7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:559981dfce4575a7023f9b46a9d4e1710de60a3a5969533d1fbbe25ced7004b8 +size 956637 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..815943e46e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f4dd29370c5c2227c253483c0a816424cabccb856fa702d20413b25168bca86 +size 844051 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..cd125d9a25 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4878703e5d8d8af02a06d4b3f4b7a1ff2426e4f6218bcfcb228e6c8da756d988 +size 931181 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6077feb02b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02afa5adcef1ba451df32d799cf0973bb4e673b618dd5110695f67c55622d296 +size 830335 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..03e0925bd9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb48391f1ba2d8471961f5ca729884f8031748ee333a81122fece85d5090cf07 +size 933843 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1fcfd157be --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:981fbfee0f6d28ee473f9fac0d11b38c574884ecddd3f6cdaa3e65b859f3b43a +size 830383 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d74af8b9ea --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c443c883f34f922ca1419c6fda3e54e57df8acc61cea5274e9e2f6b7bd0bfac +size 998405 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7f0fa0f9e3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ad98fd87a334250f4301ddb2c43ee4a7530a59f0eb9308632e05013f6c84b35 +size 896029 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..96094796f9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc105618f706a6e64d6ed02f288f65ca41c2402a5af439e4dcf3fb71037413a1 +size 942183 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..55dc87cd51 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6ebb795cb874b4bd289f2b633ea50f9fb6ef3260c5706f7c3b5309d95a43f4a +size 904337 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ba98b4d9a0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0481c0adfc8deea5ec1c1b079ef53c4be06cc81f5771a14dd19132a7fc1a9c7 +size 932465 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..05897ceff2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add19a7404b1255dc72dedb6e71f6a1717752227d616913079fda1571c4527e1 +size 894323 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..af33f06323 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:573144a5fb3d86e6ced0adfb4e605a567eeaeada9d328563746fda16dd8722a0 +size 753363 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6591b9b9e9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd67cb6081e76bdde2c4ec3b5615982e1b70eed50e366ee713d6a0774a649c2 +size 674329 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..47952ca16a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cf406637e54c155518ef516c43f47f19068ff5f23aa350cfdc0f06e72c18f66 +size 753209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..87762518ff --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a757f563a8b6fdff05efa6a3bde5d4bfd1f18b09fa6e71cacb2f4ea30c48994 +size 692379 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..adb64c0011 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52392d62113feb4eb94402d3dcc3117e06174bbbfdda0402a3b7d67015b4f7e2 +size 1015145 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..fba6d40b43 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:947db5879b4ee70ff63f4e4ceba67df6dbabc2231bce29242decda25593e2271 +size 904729 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6369b47730 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b9d58b50f4e75a7ba5b33787f03afd630ef4e1f5e2893a465516dba7459d65d +size 767897 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0870bd8f1b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9c2c62f20ca70f9ac834afe3cef31fb698063c67c62ebfb95ebb9e1cce1bcf +size 641103 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0ff344c089 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bddb4469bc2d6a34062588da720bef49a965ca2c4b1eb117d5257108fcb9544b +size 702381 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..208206a879 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f32df44b71aae1d790d045c786a0d605d83c7853efb717348eeec3c768811fe1 +size 583132 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..335a6d1ff3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8ce9947e3925749f64a2ccbbc386beaf3c54ad765c811a81cfcd27bf8ac780a +size 742461 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f531566b7d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8491d0439deeabb7f7abcde7068fbe304457d3bc58ea4506918d15b336292c7a +size 664265 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..246030f95e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f27ea1e6be826e6282ecd7887bc86f7afa878344e5de58ed75302de401446df2 +size 742357 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..03649e10ae --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c70732c7ba49dd78c4674147a912ce48780caf85c50863809d3cc01ecab47a6e +size 682217 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..851666526b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bdebd1ba52e4c4bef0fa330a8dea0e7088e3e32852eb23399bc153d0671dc8a +size 1005377 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0d80af2db9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd139aeec573a9cf7efc193a86e9d6cf0208deda692d48896b108354936f9f1c +size 894617 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..89d26526e1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:223bd744d7f133288c6509385c7fbd004fce059c9327464307dd949d58da821e +size 758129 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..485b127c29 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76e36e45fadd06bc8e521ac7c25a690d6e0a20a5aa2d080b99581e5ac4a347c3 +size 630989 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4f6ca01020 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bd3565804f72dcd2c35c4c4ad7fba78e1c1a80e37a8be607abfc56286795906 +size 692661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b9e559112b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:857150863d0a25d7652f45a8a0f37f0c75d254c7b60968c4288fe4c82bfa099a +size 573068 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..21455e43a1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be0202e111cb53594178a2df0e34cdfd53650e7f6d5c1bf5c9609eae5401ae4 +size 756753 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f01a443dd3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e06a2d07d08526fb620827ab800990d451ca210646f906b9099fae6b7797c3f +size 687289 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..05ad537cac --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f516610d1b78a016c253e40fe5e05ca2a492488af71c4e286ca3a2c4955b586 +size 756599 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8641f5144e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a735ab66bf3a1f9669e00d537402039dfe0203315be30def4f890f9f1b46c67c +size 694289 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4ab25780df --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c3f967f5f45f268eaaa6e6af6a4a26015928692155db410b8f086514436b4c +size 1027611 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..81c31d8bc7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:859385e4c0e30f122ea95b2122555ac983d5845c00c7c7e9e5200f4391b48dfd +size 989025 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ad2cfec99d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f46aba3aca946fb52015536990e803860fc8a2a9fb8aab607d174deb4c07f627 +size 741933 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e0afc4bd2d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06e01885531f0c0660bbdc3002d177fb74fba0dd9b1723d1ae6318fb0063c30d +size 646317 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1e348792f8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e82f942a6c0e3de302290b122be4e9ad5fe839a8aafe3913545e3c388fd267e +size 674739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..922c08044a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c1e5f31d2cd58a2711f28bcf47e4832abcd3c80f76d48e2d81a344628859dd +size 585880 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a261794ef1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d791714ecabef06a98e59050f132857b9ce1600903cff9e6139d1052ea93aeb3 +size 746639 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7ed79317ed --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:013bf2b95079b96a8600bf86bc99ff85b191713c7b53b0da0ac9e307a3702ecf +size 677225 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bcd6e64a34 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:972885c87aae71bb819705fe483c0077d4e72454664fd0f79ff46cf294ff29e3 +size 746535 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..43a04fe22f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d798e6eaef968c863ef1ce7020d83b274d731ccd7d902a32478ccdad09c1015 +size 684125 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..bdc77a14f5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:043f27e8ae7f7214ee2c87456981c3b7f86209fb48f2b60b79fbadfe0203de0b +size 1017893 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a34b13d8e2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9da408f0b397087f9ae1fb23105ad5fe1d8684eb84261d968a9721b5e97f39a4 +size 979011 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..570b541590 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63d5d3de313850284e0a7e587a1e221afffea64b316ea2dd272a5504a42f73a +size 732213 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9c518d9e5b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b88142155ef88a372b7b1c49d0d476780d3cc833941933c0c3f54dd77bb3e509 +size 636203 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f261fd5514 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1a9b7e6f8b674bf110f09fd9ccc42a086f4617eae56c4314fd1921196f5cf33 +size 665019 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e75b32cd96 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df212bdd337ea011365e714e2400f4005d2bca21e86ed8bfae701c35337494ff +size 575816 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9595da1096 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13ecd5603daeaa5b462d179a98355789bc807180f8d0086dc291ba54eff2ea8e +size 885399 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b0786beff9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2856c9262e1771deba5f18da8c49b3b5ceeb5e98270f013cdf643bed523e9884 +size 790721 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d41d7bfaec --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0231fcdb05729ec75bf99ae43461cad7c05c83a6b2041cc70d8a7109ab042eaf +size 881993 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..53d524c84a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cebb7d0cf5a4f331ab349c6c61efe01b392a0eebc130788d5897f6d39651ee1f +size 796343 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f4138cfbef --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8a14000a526e06d60b003851071e6b9dbd9c28fb8c116aff6724dbf64f8e40f +size 952771 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..730d6f94e0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdf3013e2887568dbcdbb7c295aa8db5d592a38f7e4ee795cc6b155b446895e3 +size 857451 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a311a741e3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:065dc99228fd2179eed7908183082281cf8e868bd923264f758d6d85b84aa097 +size 926595 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f0a467d5ec --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:402cc0540f3db88ed5e8eefaf0403f2882c8c6cdcadc7caba36fb94a26a7f95d +size 882975 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c15f73625c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c338bcc6de99ad47eecebdc460feb744c02a7d8d47be2b32041e16f1154b47bf +size 907947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..57056a6391 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aa5734ac044af60fa562bcc8a60990dd7a04955973868d6b333a1e35788e083 +size 862059 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..84f3f2f4b9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17d7ff34c6ec8297d827ec565a1c6493f4ce3d2c996a3fcb76c8648dda516787 +size 873539 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8e25f6e610 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df977e3c64d6da3f8aa1fb0de5e0f83d1955ae71372326dc7a5a494d267f9948 +size 794259 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..787c0b025d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadb059b94fa024e7efb9469e7c1aa7670f6e1a2a2ad3c6f42f6768e65cfa979 +size 868403 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2a24646fcd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1227cc0781d2fc95551cb074413ec1d49802ef8868c18869ffdb7c6b7efc196 +size 813295 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..fc981b903a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb44cd7da3688720cce16bc4feb82b1d95b3f1e99abf050b7028beadc83ee13d +size 995067 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1c3ff1c029 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ce71634ef08094c79505c33ffdf2e3e4b34a785575ebe3fe160c88310271d6c +size 887957 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c23b656388 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b73db9d4c42a9eb3f8a35d4b88a94a08694800e60e98e83f6e22bdee298e10d +size 899715 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9e5ea7a330 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa08c547da2b5d216a842065ca9beea48fd99ae3343e03bc2a8b59059a997a4d +size 777657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..932df92a2e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1030d08dbb0f0b777d93bf6117130abb9119467adcd29f7655a7bf6b35325cd +size 823889 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f2cee7748d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ff62a716871edcd4cddc1080042469b3e2a98f1b0d7def63496ed86dc522bc4 +size 699313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7ab73cfcbb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc94fd28c7ba92154bbcde7053846c8d2863682701ef9f1f3179d9c75318d3c +size 851783 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6a6059446e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1e357ded3f066722d5c412ebada88d3e6531258f401e91677015991c9420a19 +size 773293 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7450cb9d26 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d00b57e6539095760bc918f7a717df3afcb6973deb73cce15f933312e3cc993 +size 846697 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f804f75f36 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bd909ca2a0b1eb6800063f3ad287a736bcb1f452e34f5329e86c04fb4bac46c +size 792329 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a7bedcc82c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af0eacce8a0972ec676faf3a839a8c9582bfc13e2e2b6eb8f5b55f34d8a8a1d4 +size 976419 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..fac7674287 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d9a6aab4cc68b4189343dd132c6d0f583bd8a9cf926bc3e977822a0e04d21f4 +size 867779 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..de943c23a6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929fa6187ba92bea356ab5bc1fed8286b220a1ad9615756cd0d91857ecfb0518 +size 880229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d7c9049f47 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe78bcae5bba54941a6d6c03b693008786f5ff345e2a26e973431521e6e7fa7c +size 755901 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a92cc5659f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7454fea9a04e42abf22461deda9d1370f77176219d6dca26022f37a24effb63e +size 804451 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7a0beccb18 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e0ae53e991cf04c26fd24c9c23c43bf523b48772a2bcd2a7e1225d9a712cd42 +size 678347 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..15283de586 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:198ad67969433c166fcf0dc0c341f38d2b4ae83c891c4862399dad9adf917636 +size 877717 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..994e0611f5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a22c8f4206dc70987e675abbedf4b5286b90c8194712444c7541e0464e9566bc +size 814125 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8c3899e079 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:057fa29387ad49c8e831c9af42e4123394a5f171aeac7cac7a9317ee454d755c +size 871791 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..85db0e81fb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feec95a21a6986a4a129e03f68df8bf895b5c7af40e808a7fb5f5541b42d5cb5 +size 815993 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..134f10db7f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6184b1e0ad47c7beb8f5d47e364a1494375f6ae3fa892757c6884fc0fe4cdb37 +size 1011431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..8a523df3ba --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6405edadc22b2c6f5b0a2fde6b205128f60f237de5eb1889e711763f9111123 +size 950989 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c6ca1f97c1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9737dbde099d763d9d0d8a821c8b9a4906408f473a5df3d99cd939a45117c32 +size 873751 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b171ed3ec8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3ead0895e89638fa251b6fa7e745bb5f64c025a0fc6855bde01d944c66a92bc +size 772363 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c012e877e4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfbb995baee0f312a7f311f4c161ba90f181dcf950c13e0a28b7cb0e1836395e +size 796247 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ea261bcf16 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:860fff75d0412ba68d0d0d091da43dd5086f3610a3544f5a6b8ed7bda4245de2 +size 703787 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..175b70f4eb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78c6fb156781408f4183b127e321cae8af5900a291c07d77190046849e799dff +size 855961 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..00134bb641 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6171a146adb55f34b679b04367fe131bc50f2535b87f70f0328b889984632e9 +size 792369 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..102a102ffa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc74bb0c9dcdb376dde22c8245b0f5cfedb72458efd3b18c178d54040b319d19 +size 850875 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ac433c0a93 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:746b8b887afdf0266ec9b51a34d1d5f340b68b378e9eaca99d3ef4b438783b15 +size 795027 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..768a710d95 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ee52b6a131bf8f21effbff7ce74a103151728f347c8c60cf642cf6f9ecbe06 +size 991993 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5ba7de9111 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:266b534212ae2f06740d1823e9ee3045130b01269e9031d53eed316660d58546 +size 930071 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..32a8d1938a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:057e40dab996e11dacc572418d13e7e35b0fabf924af09383cc5e2618d9be6cd +size 854313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8c2c1ddc3e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16baf6ca28503491bada582dbeeb48e9493fec675f9865208386c81b2db730af +size 750607 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2a3e293c2c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d8ceed843edc054b2d11b2a425b693035903cfcfc79fd6895f9e9e70291d303 +size 776809 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..81710c976c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a12eec9bf2eacd1693256471ada9339ac8da046eeba4dcb53a2d2055961a6cf +size 682081 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..34823e3fef --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef06bbeb54e6d5a1185ffc0d7e6b5f615ca365554e3287a63418be21bf729291 +size 838185 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e9a3b7062d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f980c5ed06430d1b66986cc9a6d0b76cfaac7b8aaa82f3509cd089ad4f0aba79 +size 734281 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e8d61feb6d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a380575ddbe4c16385f41dd0cda07a29aac7fdcc2216bc4c315c3fd7458c2ea +size 834729 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..8534ff9548 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9553e90b0a3e5d4eaea9cd2b0d94f772c5360006ab54ef7d3bfa695bf3322641 +size 732947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..65b077e442 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e9518c9f682f2200dd346db6bac524c6b4e0f0b089ecb9417266f4a0343db77 +size 905409 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7aa554feb2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec5910e0b2e2e5491b8cf2ca5290039f0d4b7c110fdb63b5031500f40ec8ebee +size 800025 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8c6738f3d6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5200ea017896d6ebd5a9930731b7e502de2477044a0c195a94ca0b71a3a55520 +size 848991 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3f513a3d06 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc62190d4dafd3c083c102c099e6445cb43d38c499a567b7c073e8f081953ca9 +size 807297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8be0032166 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d300c858d8e2d6a521579ad509b8ac7e37a3d8dfd7fc9b9b4375dc2b2c7a76a +size 840061 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4cca5ea4cf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e43711882a00a45b36b943e39b0ded583286039dbfee90d177137a47e963dd17 +size 797281 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5c63209189 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc2293f8f1233e17d0d569e64f6e67e446117099ab29f9b3c6230d0f1aac0f5 +size 714733 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..72d4ca57d4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0569e2e6765bdc4de63d1962c304fffd61842d169f18a54c239a22eaa6f4ad4 +size 672995 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ab67a52854 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a8ad2d5c4c5d7496fb61b53473a64acfde5514376fee063a54e3bfb0b8a77c5 +size 731895 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..938ab6ffca --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a80fa667142fd4289ea4b8a8077e0155b24ceee577e184cf18d33c8b8aec9b5 +size 690207 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..695521438c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c53c69611d5fb64072c77fe52a5873bad057c60d7663bfafaf10be2b94029168 +size 916823 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4e2b903495 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0060a785e6a4763e9ff18e73e412ddbfa57de57a0a14ba32f9d741b952ff1c48 +size 808577 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..03be833c01 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8778d4754489ee09712118d0d0142ad85fcd07b42c1002073f4663bc0845932 +size 745597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1b36a11b44 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a78654777f732422ac8d932495dc86225b06f6d76975803676f091d69fe1d829 +size 625117 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fe4a72ecb9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:191b282a13218210329910fc7de87fef78839edee41fbd07baff9d62866636a2 +size 699369 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8b84e3d320 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10ee286f26e65bd0fed3327cda8595326ddf2943f23c30a36dfa5bce895f86f7 +size 580910 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d5de3e54a0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd64bf42233965da53ec95cf6b2e137fb23978d8be82c989272305d1262ee77 +size 704669 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6cabcd46d4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ea4629193aabe3a850e052ec4662db7172cf79e698b0ba0f3af38c4848f19e9 +size 662883 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bf5f33860c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cf0f87820e5a0df8d755ff79f12ae57e4b5903c21174f22b98ca86b93f6aba0 +size 721831 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..96680a8b01 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b7372e4ac965bffbc116a5bc1a73b70940dfd778adbdfe18f196512ba5ae600 +size 679353 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..488f708480 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a2a71bf6429dd0a733e833f2d515990da12365a7e1ec2389bc466afb8ac8414 +size 907055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3f3a0e17f6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a28fc9e472ff0414eb72295873852d0742fa18bd0f3594d840b479da5b881fdb +size 798463 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2b9584b4d6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd21cd794cb98ac9e1487c229a5e12d3c0e948b4841f7a429b6a17a3e75eeb0 +size 735877 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..88faef55c3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a75d0e71242f8c78c42692c07cd692641567f7c7254046eb4c1ff6601b0038 +size 615052 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bc0ab88e16 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:938f3c3bd1cdbda1ab83412138fc621d49602b67b84081335081aefd7a3175c9 +size 689651 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4f3e0883d3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9077dc69b13bd3b9ac001233c139f8f9597560df10e44dcf1e5871b4eb3b752 +size 570846 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c1cc65095a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7941037e3a4a00c2a2a2b7fc840409677839b4010d4ea06fa74ac672986ce94 +size 729963 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ddf12b8c5f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eb6959f888c4cdcafad4d38479340571052cc216bc7b5a199b99ea55c78ef57 +size 686695 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..984765f03b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52e18aa9bd3f2661b476f3120e2e6ef4010c1106a9ab6e9612d4e54347cb8c0e +size 735285 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fe46fef62d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21cae465ba11b7ef3309455dc153eb743afeaf460b32e26e388f97317c05c3f2 +size 692115 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e5bf12426f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f80a60e7100bae43c92b16577579e96b8cd7352baa2a5d94608fc7aaee44e4 +size 934469 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..22652fab8e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264f72f944755996008e47f53d0812a05b35b8c7c81e25d2a57b297309ad66db +size 891245 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e42c8657c2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85dca8af108930ad1d877f191685ff6246a6b92d7b7ea33891563af550948dd +size 719631 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e2638fe0fb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8198872395418de13861291970dee4f9b3b91b86a9f0d3a1d6673d4cfca5ca0b +size 628751 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f968ef8606 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:000c1893adb71464776f8b9cd165431aa5b27207cd9674644834aa73aefc9fec +size 672517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f22931eda9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb19d649e1c119cab0efe6facf6cc60cf24906c76828919db2cf282bf2a57386 +size 582080 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2f66bbc6c8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd5146d844ea6f1ee2f9b0538a49c3d7a590f79f26f5b7dd0e1f3a2b600b048a +size 719899 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d1b824fecd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:703dfc5474ec4e44dfd79571a556e07fc81243bf1175c263fa2461db1b3c9955 +size 675841 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2fc01a3bc3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d40695087e023c8ea48a156c49e413c1cba37bf9889b83c1a7c45f488282faf +size 725221 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f5321a3e39 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc11a58ebcf9f12df2e2b2fa2f2fd156ebf4cf0efc4947d7f09f4eb7144a7d8 +size 682051 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f4c4c53850 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc4b768ca462291bc6b8a711cc0b303cb0dda3f35467b77ea85e2c7910288541 +size 924701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e39a9eb570 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2629166a70821f7ba679d50454d7b49a552d910ff0dc43eb21314803465bbe6 +size 881229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0dd2a24319 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2fd3a2d9cc84b305d5ac836beb380da4cd1d06e0a748d9beb67c6defb12c839 +size 709913 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7d6419b3aa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38950c11200d6217bcee5f9c32e67ee28d2b9bfad861c950099faa5184edca9e +size 618687 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..aec145cc8e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ccb47f60ae719fb370fc4d1d13c367ec7ed4fad468dced7ba758abd9a8ab647 +size 662797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..907b4c614f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064c65eb5db21e04159c3f3dad47f30f45641efd2f29b8e033c592da269a39fd +size 572016 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5091fa95a6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0115f0c3a93821aba53a2c09ccf8835e6d26b163aea32a755f57bb30fdab8d33 +size 871783 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..03560a935b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3676b1810062de66681ca996bd435b0d926314a6d4a00a02dfb6062e75918a +size 770889 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..35eca683bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503cec3a68f6d10bb732ab0d91a40c6262427f9defef05f255d8dae2e7d7bb08 +size 874297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0e8e6378a4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69df4c80c7f12ae48df0733f7f2bd1cb426c66d3c22ba534dd443954ebf63f58 +size 770935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..652a799481 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e95febdf238175f35b6ec8702eb762f6ca056b8059c138cf8d525d75711486d +size 938217 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b79814b884 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a5910fbc25fff86973ff48a7eb052657b44aeab4f1e147a7b300e73b7435a1e +size 837421 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..2d99480476 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96ed7d9767b39ac238152e217a28de453bea09206b77c5b1fc45561f26cccc7d +size 881947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6d37136f76 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25116a2def5c31c8127431fa70eec14d41b31d769912351f2d11e056c500fe67 +size 844891 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8bf28aad8f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d70b82a943138689e6b2c4621e312ab9d85b0a1e43dbf835875465590c71e53 +size 873019 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1b2541acb9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0136cc88bb8620ed99a58d1c5f6d39f1da44a5a981406856f2e5f461e2e88ce9 +size 834875 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ca2ba03264 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dff8e09fe16b8a86ba56fdd2db43ccb32a0453f32ceff7760b075ae147b66f2 +size 993079 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4b904921eb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89547eed31aa0ae26f3c76133c0a7facf560cac0486699ba60db9ea0fa55f137 +size 980789 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..91dae94d34 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c49bea9860e49cc1604d0520834d7c720b8e033191e6de13a6ca3d802695f05 +size 908057 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b09f328412 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f9e2852160f726add861d22de9ac1416e4fcd69311f3876f07b0920ed3f350f +size 806669 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2ed80494d2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0ab0f71e6f4d318be7270465b2b98a21fa87e13e2f9a9485a998c1b4404d9af +size 983015 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..29e3da1860 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba5ca5c6e24e9f51ac7b25effafef710ea031201f51da6a8f960ea5109ef61f +size 969935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2e2ec69cc9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b9575177f9fc1a503b261a572ced2b5455efd2b96e8eb216ffe3c30e0185e3e +size 898289 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..80f290e956 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b23a53f571b0122350e13af8716301edc5fbc47cdc0d384df076788a01e71f25 +size 795815 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0735549822 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c5db53e27fc54a92f4b81fa79c131197b5997a70fd804bff139a98b5bdbc066 +size 744829 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0ccb179591 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c54b7ae39fe35399b71b52e5fbf2b312682dbf9fb765f922890bf37c8b1bff7f +size 665105 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..68e191e1ed --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ead7a298eafec0a1344cf643fc32579d2c7483106da064a94f4f60a50d8d09b0 +size 743393 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4d96bebd1b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503deb4f714eb098a5644e3277a41c52f0e7c5553e445c81e3baf4045d946c17 +size 682759 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..cf9cc0f822 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64fc54bcc1104fd13f1cf1d916220a554693c52a2ac1e7e54ccddc6906ce708d +size 954811 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..661af1bffa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45bc54ac17977addf18a5b110dde7eb2bd132f58c60160bbefc708ee3825932d +size 845283 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4cf03fe7d1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bd6c6cf4407da1ffe4c98f034b34bde5b8da907dd1c2f384bd2f79fc3542b52 +size 737507 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e380f3e00b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:914b80bbf610dfef23b131c54ba61ac162e742b7a6f3a4845c18c2f5464ef59b +size 617965 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..83cf25a8c7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2adc89222f3bc34c5e1d8740155f6c8e9478abea9a534d91f3d25dc9a56b3a75 +size 689801 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1e6945f688 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d48453e08da438ff10bb3ff6210fca451c199b491b8d2b4bb6a0b8e8c4deb0b +size 572428 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bf2901a88d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4bdf37fc7e8eebb11040e176a10f6416520713aa04b2ceb55ee18216984834a +size 733975 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..80582b98af --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fec3d81ad437d63084af21a4167ed6deda5a267783f99a187a01fc2b4cbd7b8e +size 654991 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..56d2bac4c8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033278deff68d14957ff772025211a6b1721e82d2bc9939679e9a961ab0f5885 +size 733279 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ed1158631c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1afe012532be6c2c8e38b4ee4385957dc03de4385589f4bc3f9ab8c62068592 +size 672695 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ccde7c8870 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387065c7a86bb371d2c939f9540fa4f1a9007aa2edbe3c6d40679b2e7a861723 +size 945881 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..82f319a394 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a1d2200bbc2417e8f3bdaba90b02c8814c940915255dbe358e6cb85c5ad3a8 +size 835169 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0162954452 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b60b97e2222ef159b63374771f23f13bbd6c8c682ea62c4fbe51164021af5a86 +size 727739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..09aa1b4308 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e3124dee29c9149ad96e7a745fa389edd33f3ee16ca3a663bf720aabcbed60d +size 607900 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a079342eab --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05ab31620445886940e13d4ded753d7fadc5470bea69c841d30881a43f8070e3 +size 680081 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6778e7f5d5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78fde2170b4ea59347d05d819a17f2efb9f72cb7bf10a118c8f118f0940cb1fd +size 562364 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5a817920b1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aec061666c5ee86a6f866e91f19684577f9841e916b0b58ff30d72965fbb6699 +size 748217 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b4bbf9d923 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8224d1e76934b45ff2d679a2fbed05a77e09a10a840ed659b4bfc8111d64fd5 +size 678755 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0cbf9faf6a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a31ce5eaf98311c126ccdc0cdcf25b61b66a4c5f7cdad6b29ec897de2e85c36 +size 746781 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..159b7973ba --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c8dd8ca5f15f9ab6e813e71f914e1668342bdbc66168f5ad0dab3e0fb1e8b4 +size 684669 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..36e4513ca1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13047d6a295efa348d22992ef50dc3aea184f6c85db872524e1a2c8088d43322 +size 967425 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5487bcd795 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f139d2e2c7ede44fad50b5ef2c27399a7c60ef4f5a2c8fd26965520c892c8b10 +size 929627 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..88fe44777b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86d6b04b447a13ff254750749065072089f914cd80ea59b1d1e72591e363ad39 +size 711543 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5780746f1e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b15a36ae2562a52a34821117e315e3bab2a528de9894d1fa2b3840f8ca15fdc +size 623129 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..729c0963e6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3627ef804ee4ff1b7c540f3e1972bc7110a7b918873d211d0e3ca7e25efae27 +size 662947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a1ffa66103 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6988909c29cae2315e67d31c97a3ccc06d0759f5d1a051500cd0c4bb886318f9 +size 573892 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..addfa124d6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaed39da4315233f51f76fd1a399c60b137ceeac3bcfd2e0e3174369d22f063a +size 738153 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5cb071b2d3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e9cd0d06f4a484f0c727d1c431c6c2495f43f71a73a2ce40af95afe7fd66149 +size 668691 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ce7bfad286 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f805a9444ccf4b55307cfacbe65f2ae514dc820d1619a5c3b7087935268d2c5 +size 736667 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8786c7c32f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ab2aae1cad0dff03470d6eed7875b7a6644609fa239929cd1b6e1c731d9a2f +size 674555 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c0de26b6bf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c97e3618469295d75e7ead2bbf1ed829d573f1fb34b786d73ff0f3508a4b3283 +size 957657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6076b02119 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7506b7f3c1b7c95294c130f9a8abd92a1b893f230f533ed06410109faf3562d8 +size 919613 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..75f9610668 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c706f3a47b0a19771093e48e2f9872a24911fafd7ba17dfefec8cc880cb1fca +size 701825 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d0ed4bbdca --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:341e5bddf50fac7d4bddf1c6b3b8ac4636c7bc9427b74afda28308ee053937e0 +size 613064 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1e59cf1ea8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7a08067b9d81ac09839abe87256366eda25818fa0fff089209fa31ada698e81 +size 653229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..96dd7fad89 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50266e0332c90cf4bbbba1889d0d2c3ac2184f09639b89a51fc3a849401f8d14 +size 562990 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3f66a2660f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b2ff11f299f7be0229a38bc59be2cd1cf940eb1431c65c26b22966f7504f558 +size 821413 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..221fb8626f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4db69ac3ef7dff616fcf78d496768379d6c3d08aca9cf3bb13fe7f8f3d7f2552 +size 727327 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..1d406c3d0a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d926b7b77ccbe7e37d41bb6106640fd0fe7208ccded1a72b772974dc9216a51b +size 818895 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..35ce37dc5b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f670c47caf3aef68a7ac37da2cf8c7605eaaba7e4f5d218592bc639f8cc2ea40 +size 732653 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a8e1d6915a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eeb67553b9dcab4258a85ee4d8d3b9d9eff0b117a21895453d02d98d8e33471 +size 889625 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ebb51c7656 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9439db399324ac614fbfb0ad06b7fed25df6c0ebc7f6626d35074d5d71abd4f2 +size 794107 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..3eb254dc78 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1616a2290be44cbf13b3173f0a57e5fa9998367dfbb7d9c5c1b4308efcf26d0 +size 863447 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ceeb5bfbc5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea62c6a0363957788db3ccf952b9acc6bf67bf2195bbbda1dcd1fd3fe8904db4 +size 819631 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..775b1e8e54 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:331ce8324ef797710fe84b4b5ab50de9dc8000b8644ac142fec123a910c1aa56 +size 843961 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..bfb666f57c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c401f6b26967d94a57ec3d4c946994fe4a1ddbf9f9e41bad4c29d09d5987825e +size 798665 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e9a71c32e5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a6105ad2b225aed6ee5ac1e02f73f9074c2343b827c9c2a097d914d41bce71b +size 864807 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2bee2468b0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca5b89e61856b8e7641d67a45a06fb3ddc038f21f800f20be97a99fd956b625a +size 785477 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fecdd727f9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d31d13f59bfedd94153d7a1954a73d3903ccd68cdd41285883cf7315b2e04cf8 +size 858685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5f8579407c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc1a2a3ba40602415d319d5502096d28f525e3f2f94c74194b63fbc96a6de597 +size 803527 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..74c4314ddd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b596d12d3aaef8dbc63e70e38086639a5cdfcd7f179b541feea66c61860d7c9b +size 932019 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..920e640046 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4997ab621aaa8b2b46277ca7e8fe823dcb898f0336ad2f737546e7e2670de7e5 +size 825057 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8f2a71c5bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33c751176e950bd1f99cdc54ba466cbf7b4f58c4ee48b74f5a45e9148e7df4d2 +size 854675 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..be91e9d30b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:858af0e04ad5aad3f9f43d5c7a1672d969427f7f1c05db13c712e1e7625a0343 +size 730001 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4fa419add7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:921beb06ba464eddbf76aa8e5c997bb619ce36690521adb306972e60a84abef4 +size 802921 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3979f23ccd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e87a268c50321021214441aa8de9c4127f256676094eb9545e198a15113cfa +size 680469 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..31fe70e885 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7147236480f3432eda82e86475a3aba009bcaf049ef3f6b41f50e3e124ffbf37 +size 843051 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..49667dee74 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc892f5f1d7eeb7123ed5ef4f7139a114ad550f58cf4d38e1e38efeb2f4714d8 +size 764511 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0779662c2f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:015fe2285ca75bc603a9c12104a3c1fc285271dd432322a38a028d2619c6e620 +size 836929 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bc2a50b162 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8079e6e171ece96b055b306a14080f754f82d3faaa2443a1727ed15855e5ba6b +size 782611 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..941ca81ab2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:148fbf717ee665f8c7d035ae1b4edcf8b3d4378c037571a4ba62762dd773ff92 +size 912533 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4d8cd82d58 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b3b7cf1c80dfe593856f91b0becbe79c56c0c13e576941f4d760ffe171b7fd4 +size 804089 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c12452bacf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd44760fc27c828874b78e0cfce896f0c3731c778cfff24e54734128e0f3d1f +size 835187 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f8da7797dd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e1e4c9c40991324b982ab31b91680231846a8cf844d1e69e07db2a1cadf5df +size 709035 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2774378f84 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d763658ed2d4c3c2596274835ffef3d475f2597b99294de046f2434e54659c29 +size 783435 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3bbfd4b642 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85406b20d29a090e458b5043333075a32dce0b3edbb482a4482eb385f64f5f0 +size 659551 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ac17062640 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7409121c7cf8c5bfc33a62dd6801421d6eade5bf7f4ae65afc13ce7ecd39105f +size 868197 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3bbba8d760 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:684352ec9644dc1fdc345cdf65d0444993d45d3e62b4413eea91491d769d420c +size 804603 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6c7884449c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b771837969701bfcd8f5c0d3976563b8f71e61c14d02b1bfdd0aa1d3eb64b2e4 +size 862863 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..346b9884c1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1b00dc01485177a3fbb469d20ad20b908da62d793d4f2ccc27572a575918417 +size 806225 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..fc8bac09cf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3da95f2bc2c2a7d653aaf77625b5c8d120b61f6f376dbed9099ba5e0a719d26c +size 947445 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6c72b8a880 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c84b70727f3656f2ed92239221b4b8ca1bf9bff096c7918c490d2234a61026ff +size 887645 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fadbfff5c2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99ed2c4aca80fcdb4aaf032503c1949726451a1f1de8fdf533e07b81cd7e61cf +size 828709 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ef338805e6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd2fc6ce5d9c8a46a16c6f6cc94dc7eae637b1e69e46ef2100e05f3e03e0acb2 +size 734425 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..627ca0667d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6c6206aaf77321d4e5bce2d25af2e08afe0be74b61a02e69c430f35e78d9a0d +size 776069 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b96f86f589 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457b884ffea59b1e9c999e3812193f82a72934d741614dec9d53ee10c8260c59 +size 683117 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0c4e211140 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75da475c9f645024ccced7b4e17829fe6ccea352be0f6efb16e8e3d580530151 +size 846441 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..41cad84355 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46824c1b026eeb6c1b0364a61dea0e20c4d6ce9cedd137e0946d5b0912d58182 +size 782897 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1baef1d2a9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57d9f3afb7fbcf769955c32a9a812e58a35a36b88c69ef78a2ade5a40f73f32c +size 841107 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2a60f40c99 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51463f3f5f3c7750293bafa4b12f63f709f349b656fcbd64318441ad06b4e4f9 +size 785309 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..1f119c5654 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d04abd7b85ecabe8ec220c076da0f50276c46ef7f511c76bcc42135b64871b8 +size 928797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..c801421caa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f37fbaa7fcade02802157f840cdc931f33d13887a57a63192d3b839612e9e9 +size 866679 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..501f579d50 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e93c78f18200ffe200db5d1f22b6da68a22c13b32b7a30de235de8e93ee068 +size 810061 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5c4443db6a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770fe4d8c0cae6b8c16e1c51da09413b9b1337d8a92462504f2cd95d3c75d154 +size 713705 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3d2c33364d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:023a38e584d24306ffdab7ded1eddcc4dc51207f510c9008b457a7fa09816005 +size 756583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8b6f0005ae --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcebe26e3cde238463b90739dcfc68537792b78e333f37beaadf00405269e5b2 +size 662151 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a81e53e705 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d19dd38c7067107b4f6e689a8c36fd149284466eef0ca3e6708f524bb79bfe3 +size 804589 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1074328a5b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:881c435185fd2cc603ad2dfa978be03dc732e321d969057386f7f08c78f151b4 +size 700685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..a31b6ff4f8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef587dc4d37cfcfd2cbd1a2e7d067a36538c1529925fa953eaa302746e0326b6 +size 802121 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1a38a5802c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a53da7ad37b4885b49c8f54d5d43bb974029bfd433068f303a3f437f0854a4 +size 700141 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b8b0c8dfdb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba7a5f39b759fd156aba9e1f121c9596324dcaa58878ad66c2d95a7f62379fab +size 870975 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5edf04921c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e7370337d27c8500213db5f3421f3ef1c6eefbfaeb8f53e1f77fd67d7b5081 +size 766379 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f551fd4845 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0ffec0dc1772efcf21d153e3e9d41b2747424226e5dae238ac9fa4d32d40824 +size 815395 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5ff673b238 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:176ffb5cc383746b662f09275e68cd358737131d65c59f4fc85710ea532ca3dc +size 774489 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ee5a3291f5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11b82d4c0aaa3489c8592298191cb7c8fb1fee0ef86ab37f8529b933239c6632 +size 805677 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3d1ac01960 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d8927684db009750125a485777baf1f331fbdc9494c5716f00825954b30ae6 +size 764475 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..30d997f488 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4be942f2717df46951c1d4ed180c923ec1f1d4b8f64b9a4f0d7f65509240b96 +size 918583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..735602c1dd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e38196a9aba460cadb700c0480a445c426af65b2d0bec1b8d91108987bfe7fe8 +size 906935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..cdeefaed7f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44a81f7fc2e24f6c22ca20cbeb235e211cc2fd17378efdf1c8984b9874a3917c +size 858623 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3692dce625 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1296d0dcae0eb1f3f17ca5b5d95e5719eb752d17998c005c514476b8e4acf818 +size 753781 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b4bdf7ca73 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87137178b7b77557e38d76d1ef87b8f8ea355659e728c3a898b2e002428bccd3 +size 907729 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5b84b33696 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35622445698a96e2ef0fa37b65fe6a5941fad33a9a457363c41024defc209513 +size 896871 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5671012059 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:734e0d3804db4032e229e916cf8674f51d69eb3eb39d097740d68ab5e69cf089 +size 848905 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..315de92364 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34dd2393f0e96d6c7bb18e1053f26e6cd7b1c243bb1a66ef3ce7cc23cc729ca9 +size 743717 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e1bf5446f8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39667af9801caa683e5559b6007fbb049456a624e3fd145ff876158fa1791e61 +size 705459 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fe6aaf0ae4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e24fc3e71466e77e84d85ebebe286f2289c1673dac4d1077c77a38a789f57a +size 664511 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..41f48bc696 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:374867c7f363b3e74582fd75fde84e6df8e55ec1cd3d11cf66e1e818a7c7b3a2 +size 722275 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d92051d28a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f3b62277c8af05dba9b3d144b913227692bd956250049da634b64b9eaaca7b0 +size 679797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9d16567ce3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420c54bab9f2085717e67a778ff6a3b81839e515fda5f4be0d43f14505205681 +size 884163 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..b53a58e2fd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c395cbea417ec5617a35cb396aa7b0ff02c21dbbdb8cb6d22e21b027971c075c +size 774981 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4823c27bcf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:522883a639fd430ebe7d0712dca10437e529b4fb771f6ee2e5cdbe94c16c577b +size 734497 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9ab8bd51f7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b15c7e0b08a0a47de02625c2eb60ec945a79ff35c2a912d7598e83673a372af +size 615792 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5ca94a8375 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2afed980f7826b90e36c370c3d59a8de8b461aaff9ccd215379b5c77c95f050f +size 685507 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8899e1e9cb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae96e514e67eccb8c4f191540f47f0c40fe2fad89aa9647839f3d6db5000528 +size 571094 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b0fd9edc85 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a197b809f51d0fb2bf8043fe19fd20b37cc4e192e7d744bed4e8c690460be97e +size 695395 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..20490332b4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:415e90d0de80ebacc098f9754089bccf2b3dd5ec37144b2d7a0c6f41fc898d73 +size 653607 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4788573c45 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9990600ad6beec68d0e9d9223e774086f562f305a4c63315ed4caea64837aacd +size 711423 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..037892b566 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b917fc1fcaea4b4bbf5bf2b9071df70bfbe7ab8c88e917d7ee63de050caac6 +size 669685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5067d3763b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bc7c26ba878e759a625a566a1f7e38d1f6cdd41b9bc0bf015d67b16201ff7be +size 874445 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..46dd8f8741 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c74d77f11978075358c719a489144def9ace47786d55eee0dc8310242af7925f +size 764867 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6b3cf1d7a5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c48993b87fd967776d5f0a54333a932f8ddd68210b335146109813d7e9a51527 +size 724777 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5e435b1054 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4445eb923cb63cc17b1fc5b6c973d177e5a02965287d45dad2927dda62fae608 +size 604938 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bbf8bba7cb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e35936539d09467ff75952b023bb466fddef3d2d759c334ffce3bcb379156778 +size 675739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3835e8e333 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6661652c6ad82780a35bfb8056903724af63994c8c23ee08890b69def109df1b +size 561030 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..277d4b977e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0dc755ef7759bd4fa7f91540a74de49a914a28ca5b9eaa49f6d66094b5402cb +size 720639 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3a2b36df76 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:917b453e081aee0941bf92ed152fb5b032951a86a1f1019f763889a8059209d5 +size 677371 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f9d00e9913 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c69576b0cff58c305184afc732ab51bcf2bd36dc8a55c75909b2aa17160e9905 +size 725665 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b450313ed3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9191ff72aea70ab8c3366d987a5d4e96a7f4cc387bbc0e2b9d0e957a74904175 +size 682495 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ebe278c92a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c42ad16f2f9f725de1cf449bfeb8242a45b57e260e3f0aafd58aebbb5b534ba5 +size 900823 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..49b8dfd5bd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0877ae419ea8a402f248b689adbc42176a764979ba49a751b8151ceee75d71 +size 858437 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0ba156b01a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:298265cca3999606a8e8e35d48aefc4d2ece52c603c15c401e7f0784dbae4e77 +size 708581 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..959068af70 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cded4e0e69a64ee6844b5aa531f981c134ee43c0a12050625783a2809f884470 +size 617650 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7249480244 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42781ad923c9710f65a94a1d6422af12d315d40453a21bbd9ecbaaf40ea79fe +size 658653 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..39a977cc3f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54315bd0131cb4689aa494c0e336916039325aad440c63ce8e39747adf80785b +size 570338 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..cc27e1e5e8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b010b750c012f0a5219770927ed9b2ec90641bb4e6c441d87238f98795ccae85 +size 709785 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4f41b165f3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d742edda854ffc4cc7d1cf74df6b5dad80c02c5748d3fe2240165b4fb1fc0e86 +size 667307 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..84f9da6c27 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd4a7b0c667ea650973629777af4b8309312d2c70fa71e9501c43cdf6b1712b8 +size 715601 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..03752f80f9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc9a452b3a9ce872846a6c34fdd1220271d37df203597ef85bab274b0c1349d +size 672383 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..dfc10e1d3c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbafc3d875a3a215937e6fc96e16e08d3622e2f1bab3087c824a56574cab079f +size 891105 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..cff204e41c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f1a0206469f16a36b5660cdb7b8b9ae65451baef3dd31766d2514a3d47542c9 +size 848373 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5c7694faca --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bd23df85bbbcc6b969fcd02e01a51b556af6729c7ce991a406059b19131cce9 +size 698813 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..523c0e6ab8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac902c53d70bdfb0c31955329b0330adf021b3ed889b3d21a763cd0d0899d13c +size 607586 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bdb8a713c9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9969f8cf43e100bcc191216a188d2932febe15c549334fe534e5c09c02d260 +size 648935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d221956092 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6449c92f4eb56d9c451e51d4747af3e01e1df122bece4fc41627726f99bed660 +size 561852 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..eb045a1135 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e081fcb0588d27dc16829ce3b10c1e81d0b7c70e392a8dfb193f4e9d5735f101 +size 881649 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..c1deb55307 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72ba3997468d6572bbe0bdf23a269e8f98e1c759ab8293c874ff7611a11736e9 +size 780657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..63eaaab940 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8008dd640245466cded628892ebe01fcc04b3370c6d6c3f08cba6c980eb5670 +size 884065 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e730a55f8d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7de41b90dff020a655af1b2169ecfce3ee1b3d515e6ca5ecb5aa9b00a9f6b399 +size 780703 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f2b9226433 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f9fe1a49c4f9580ae11fff2357922f4e03915be1def584ba3a25827fd20121f +size 948825 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..566f4db016 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22db866b6b4fc2e311c36913f7e568fa4eb960b896eafe05190d9461fb711ebd +size 846351 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7f855aec01 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc4e0ad641e7d64b8cfed979da947e89be61928f63b84f96fb5fe9b4a60e0f7a +size 892603 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..dcaceca69f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41b2eea52983a3946c39058b22de3b20f180e58c31bf68eec2b11e6f3f933b21 +size 854659 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..430d7475ea --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf8e878c40bdb6a7a4136b628173d1b5b212ef3745b05207beca488d8a351d3 +size 882885 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7788c4e518 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac62cdb1c5d40c38bc91c17c75bf08513d77be202d727cf997c7ebff225b5d59 +size 844643 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bff893b9f2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95fb075348f8de02f8568aa34a7313dfca5b473b9aeadf31a57b5936dad04b03 +size 993769 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4acb681fca --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e136cefc004fc33600cb196f93de9d88ea75d300b45b92d093a3dc1f97fa35a9 +size 980689 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..20670c905c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f7abe37b92d382540016d8fd52126fd848a1f76ce8face3f39056438f9df071 +size 916147 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9232968ee1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f23e7867c756bfb2e533718e7297f9554f8c68c47a4f46fc643dc9291b2f875 +size 812737 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..97f1683259 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7c4d2eac078dc78561dbc571372b80d354055e207cc3adbb4e7ea840a815224 +size 983705 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5901d2e80f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7bdeb8aa334b536ec494c801658006c9bad260f158c4b467e003459e1310e7c +size 970625 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..47ef9c4d59 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3da6daafcba613ca3ad2904cf1e64143c479aa13b787c69d868e708284b2ffe4 +size 906429 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a2795f8244 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af9e6632cf86d9e7ba26a23cf037d4ce622e5f9efef57b105833f1c376404eb2 +size 802673 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e6b4426741 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d96870bf4d308837d3e38b1a6611d2c546a288e15f630d465e223d9fc44c21 +size 744731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..450255cb01 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37c78335737b2a399ac2ffbb64328cfde2c9e1907199b4f232a835796ad7aa84 +size 665795 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2954f4b6e9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3423376055553858ffaa333aa72de6320c7584e086eed9bc1b907df02edb09b2 +size 743293 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..361f8a588d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fbe770571377f6e1971c6854fe85c8133282b84b32668aab32c7430ecfebecd +size 682661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b374cb8596 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08b883e36349c4170552a912db83bd8b07e64a3d6f9b85c866409e9bb23f2252 +size 964579 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..160c6cea08 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3b9fc0854ca2ef0f7d912f9f8909d5cb80cd7ce919a7dc89f24d881cbd00dc +size 855051 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..103774588a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff6bc98d6967b5934116a7e0e700d0c4cbaa0f71e9c78e04be790cb46b86c30c +size 746635 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6d9c80382b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c885b6e0291bb89010392964d4a4403ee13807ad62f99eda60dd202932a5ea2 +size 625859 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..dc04b69725 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:352574db9f007c4dafd7a5e4a31b414b8fa03d8f2c372b827082d9ccfbcb3e8a +size 690097 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5b3fd44c83 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e91f6ec40a4d9d1cea6399b09b340aaf09893f26f32f227c61fb224f169d3cec +size 574302 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..cc68a79a77 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a096044d60f5bfaea22d34ecd7d6158390d7173bfa96188a87a195c51b159e6a +size 734667 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..073655ac66 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca34851359f104ef344c6ac1f4440dc8510fa0898fe0f68911319a209970b86 +size 655731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3c36d34120 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d6ae4d38bc9dbed3f174b139381b32c11bc28be6d664d17145373cf9938ba8b +size 733181 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ece10543d7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c01607bfd485cae0e58d8abb61a68fae3af8146afff9265757ead93d6399b558 +size 672597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5f0599c85b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe4ca670e2165e5082ddca2fc58ee58de7a460f24e81e0eae63f62809c1a2d64 +size 955649 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1e1ca33f4e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3ad7513a10660e09c304d2a792386acd82e1ecf72552094977cbe34482e476e +size 844937 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..104347442d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a5c02e258c5472d808fe5ca7e0aaa0ea83aa41990ed2c749fcdaa944a6150a3 +size 736915 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..85a64326fb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:985f3f09e0b439d771a375c28929f278fa7b04a641d90db166dcb93247fa2f7b +size 615004 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d898a4f0d3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf793e1bc9b583491a6336ff9fa7b4a966cba55f0b7a1263bc107f75e46d446a +size 680377 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..12e9c165f4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2213d9437ccbd45dc60dbd2aace2fa62326b2585b398e33fc9a71cfe91753eb6 +size 564238 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0d173a6ad4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:572433553f9d17dcb6bef9d5b1a999bded7354cec29b668d17acfed0653a56e9 +size 748119 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0b1a8b199e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0c4208bddfe610d5af00475d6d92f5376c618bcf2fc7ced22d596759e266c9 +size 679445 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..450cae46ee --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f59cb7b7506fc9f7df8dcb615a38b3e11da6e0bb50a103abe217ed00874a4b3 +size 747471 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4f261ae5de --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80442b197f2b0aebf7e90d587f741cb926d7b249cecfa5edf7b774fad503b883 +size 685359 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..81a6121612 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c50503dc052929c6a3123514e6044b759ce66391db861fb1ddba831146d25ff1 +size 977243 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..a32f6bea1c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ff9bb7356674f49a44ef8cbf23c1a8e737993837d6b90566c8d40a86c5de508 +size 939395 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..95888edcef --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17631bb2624f0f7f2ac3165730f9b890f524122b62ff7f4bd1fa574a9093284a +size 720669 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8107e6f21e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9edb9d36e970d3316231723699385b58044d5e4fd884f9dd745acc19848e41fc +size 630333 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..00e5da6097 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61ebccca8f085b71f61df89aa92b35578e3a8c716741eb8d43bfdbb50f74e521 +size 663243 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..be6b9b6b79 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:246f1e9009c2e152e5242f6d514cb1df0ddc69ffb29d28791ac24790bde8738b +size 576014 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..785c256cfd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e0a0d985368abf20c21177b63d70ca896821835a56125da089e6fe1379660ce +size 738055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5ce3094c2b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e0fe8a24c82479d7c9836adad8d631bf60be886eea51c0edea75329b1d4d55 +size 668591 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..33690b5973 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d18da8f05c826a31f45f0801c9bba16f7ee0d996c4996bd384db510e6c058656 +size 736569 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..99baad41a4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:018e2f62ba54c2c5dfd99cfea919fc05a410e9cd501efb37eefead6d843a7245 +size 675295 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..305bae1615 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63f1acc737b77091a04a853e60ec1fc20a7e02e78f2ae1427da37c5793549316 +size 968313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e9d674b11c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc81aa634d8dad8628b89ca001733af10ccdf40d900077e2b1ffd65b627d0dc5 +size 929381 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c7df0010e6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01484a5a88a099e4f09f8e1f52d8542c4bc58ba441dda83158f1e13ba0cedd27 +size 710951 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8e88d82183 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c7a6d55025c090a56d63acf2af4401783f0af380534e9a65176f45ce1285642 +size 620269 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8b5096f86c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84aa1c63d4ab46cbcb6b2473190c43f74c007b975640b6b00439d37b9871aee0 +size 653525 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..622f1a2dcc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75baf94a3213cb4e4e1702d157bdf27d6c227851c34fb2d39e14c4120438b5f8 +size 565900 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..dd08922ce4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b33c8ff5895baabedf4a762155bac722f44708d04af0ba780bc56f863004378 +size 837743 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d3b74c8f4f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1da53473b1bfe894d359ed640ffde527582e085e4b03a75c74abade03815457 +size 743607 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..114f38dd81 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f793f88545d0caeca3843d0e62307a9dbba4ac67c34e6eb1240e733c6f9501db +size 835175 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..9d4324d5c0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:076157622197e26ad2d6c890c762d463e5fe9f4d85d2d6cd918a7ffdccef94de +size 749723 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c422138e13 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e69e7883f2fba6c559f1682bbb1416d8ff8a6d7dab6369ad1e85c4b6079fb1 +size 905115 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..0626f2ff4b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b965b0ae79e38a087b152021f90c1803fe463daea1815442c30b655315e22a90 +size 810387 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e48cb90ee1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cea3167899fcf266ca1d230a58fae4bce6fb393fdec8eb5f7920f3577660fc0e +size 879727 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..73d3c21dbc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32225adf7eda6a8d09e361d1cf96045ed4ae3fd432b1981aee93294c79101788 +size 835911 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..cf9c16d3a9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab736402dab6bed96fa02a9acd2eab58fd148c2dac34b50999b095cbed6b5930 +size 860291 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..86c452a717 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1bd5ce77df370c30bfdcf04000dbbff37a23449ce2c907e624f602a010a1537 +size 814995 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7cefc8cf29 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a66569514f2e3ec73f4e18ea0623003c71e9443feac5c61a8581786d6bec70 +size 864709 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5caf0c6e44 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f761f4180bb6d50cb0bd44c4a23e826e69d4c3006fcffaf78668692e8590378 +size 785379 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8fef5c9509 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:133f0d9f042c27195290394b22c2b685a4e9c97faff7556243b04efb631014a4 +size 858585 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1cc9faa1f8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea2e5e301729964ba04f659ff2dd34a00103a648f4e43d5d64fcec549ba42263 +size 804219 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..53b401d166 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e955e91cf9a131a7a90bebcd6a38986da2dbe69b0201ffd2b0b83c10dbd4bca +size 948299 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d1b6ebcd4c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb364889a58303fe46cf822ab86f2c9e68965e57f8270c66d6a1b5c4d7495854 +size 841337 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7333815b6c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f450b36ccf53debe4fc6bcb083ef386347f53b1b598e9d75dbc42cec226591e6 +size 866811 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..749c4e7cad --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86fdd07211bc18d57d8100ed726ef1db85cbb063145c1dc8b3879e0c0f73b741 +size 748747 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b3ab7d1a6b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1e633063f1eb8222956b29905c28e2b197e6a3b8d3f1ab7b42104f1ea71de57 +size 804303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5f0d34fa77 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:496a1af72ff83f71f35ab9159d2c0923d635bd28d6cce5d625178621e0805fd5 +size 682689 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f41832cacd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e679ac557dbfdde4858c2c29a83c6d8f1b84ab76952afe92f5b3f288e7c2f83c +size 842953 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a79b5183a7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcac0a2f92107569a5dd2cc449192b1f2188558bfaef1178b04e4a8d9c208fe2 +size 764413 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c4ace8c009 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:132b82308bc055e77238cd71f37e347f3a395a12f2ab2a7f3621fdef63cbe2e9 +size 837619 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ba199c7258 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79d945e644154174d9c34f5192fbc70de60fd07fe888c796a8a32674ef18d8d7 +size 782511 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f37b708688 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92dd066ae03ffa5ef48913d304bd8f7269c741360040ca4a0e59ddfef84c4cc8 +size 929651 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6bd4ff9db0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd05a2298ad4a56b5b6a1fefa47b49fda0672860b94f960e11224ab21c48a881 +size 821209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c614f3a91b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2bd19aedea586fb7e126c9a2963b4a6de6e0040d9115e19c2b79bcde3ec3717 +size 847373 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..276dab418c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b9ea5edf84cb42215fe5334a1088e6de21ca16d19322cc27c789b3c2a0b1b7 +size 726991 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c28ffa4bdd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36db7aa453ecd8a56c96d030cc5b5a6d7647e6d00b939d171b3e8beb1c723a66 +size 784865 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bd7ff82d30 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11ef53662e190d0757a4baf7aca85d559c3aaec811e2a3b13a49fcb03e92056 +size 661721 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..888f613faa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9f4d5f52fef8a86e9d73b7f861d58e003f9474e878671388a74060e738e4077 +size 868887 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..07101274cd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a04294260373c72fd6af5362b9ebb2a404e835ffbd224ef780ed1e9168d9001f +size 804505 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..40e9d81c08 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1a3530268527195f4124a06c8a32dc1715b37c5eb198aa36370975e636fa1d6 +size 862763 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..99918771c4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:011b7c0ddf6b2629e854f175d83fdd9b4e16f712f110a948041b59bc9e3861a8 +size 806917 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9283c35bc1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b0e7845fa0049453bf21058757404fae06864d98c9b674f0519e22025a04a12 +size 963775 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..03d840d372 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b395f21c8c722c980e3daf1d18aaee0cc9589dbbc3f823aa581ed2af1f1722a +size 903925 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..19f031fccc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1def3e63d63e13f4be34665ee1eb9bd67e54cd6719277e7816a7f6854ff976 +size 840845 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2e4e16c511 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e704e72d8f3823678b2a19a23ee09c4c4c6b04adef0c33ee8fa0f478d0cd590b +size 743405 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6054effb93 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:028f4235936e43f9695c69ae6dec645e6813bc24c2096a9c2228afd49e51cf3a +size 777451 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..de147bf5fc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a69a120097cb1e3a0d9a01af67be6e7d3c0b951901753f55cf24cd13c7e7dabd +size 686127 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b3d5275a12 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b63c718eef59094a761dee9961072da217bd62e63bd17529446bfa7caf2f9f +size 847131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3616f20bce --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387ca0495ff4adf4018137c93ad457bfb842ccf79288f4cca7298c618db28c30 +size 782799 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e8968657bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f52c4dd864f65fafc3a700c94277a25e3777f6034405e2cabcfe2751631ab34 +size 841007 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..cc2e591bc5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e48226d4615ec5c1ded7c62cfe5507ebaf122a9ef6d8f9cf5632d57c35778be +size 785209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4c0255a638 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c67d416008b976fa7ce9d160985d57699b8b5c8292050418f9cd0de60cd07a1d +size 945077 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..242835da8d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46370a135980c54dcd2da8057191b436a1c34c59fd5658c555ad149b9376b852 +size 883007 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d81e2f9ddc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a3f16dac06c261589177bd2361887cbb8be848a1a1f32997f328ddba43454ea +size 821409 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..02c2759062 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b970c3dee38638b90f3c4f02021e9431cf13a8e17e5f5ae3d13f83416aae5a88 +size 722685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b14d6a1219 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e5c2420fb0406fcec24bb5ed6dce777b65d7c787948bfaee97cbe72d1cd827 +size 758013 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4e21ca64c7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b2b442d42a4a51094b935e67abe7ad1401e61df787830f07fe5c1020240783 +size 664371 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..b631cd8f40 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45227a669725a96add8b530ecc4bb6c942e4668eef9269e313d7447e00457f +size 809375 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..e3a36713c5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e44f0722fa0532bd7ef26cdc8b4a803c24165aec175a8ae50eddc67feda08b +size 706259 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..17abceaf82 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f4dd98cfecf5a90c0d5273ff7d4b2cd542a79695ebae3474c6e241e97268e7e +size 807695 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..053cf156e3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06c9fd14fed41c7cf341d5b65a50c5938bd83534c94d9ead3920c315374f2716 +size 705715 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..94741335d4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d15fe3497953f03b97049cc56341089d078507549c2779f9d658d8ca5101600e +size 876549 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1ce3aab9d7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:779dc6ba7db203542add9a3a5fd817bf098e12e006806bc9e69b6e6e0c64f5d5 +size 771955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7fee434e7a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e091f075486ad4ad1267b7c3e8fe90044f9fde7d39d3aa0907c8748a9a1bc87 +size 820969 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7c1b79e2fb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:594044c57da818a292777bee62cba590d00e1e4f38e48138caa8a99892cf2292 +size 780065 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..f3300951aa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:614cca29067d2362f2fe3a1ff9dc0c927a10e69d47e36fbb6e6d9e3995c7a052 +size 811251 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..5424c3f5b1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71aa13a19d74e8f6648b0b75a9fbbbe1b57b32480bc050842af4ee9a069233f7 +size 770049 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..74f5e0de21 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6633af6acb72b1c30b9880f8519a55b0c8418baa34b40d5e7dfeae2fc9bb08cb +size 918485 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2edd35eab4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f0ea90dd85021df09a0bcc0b638c24a4de2dabcb10f984e619aa21778bc31aa +size 907625 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..32b5bbebee --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ab831c93ec3badf073ce1b6e32afd1507bd2343a58f900706470ce8293ff37 +size 864641 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..965d9755ad --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed96f247c477a16224f605afdb7fdb63ab32d0178c26ba3cc2d26436e32d15fb +size 759455 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..38ad5c9739 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955289664c21d5f847096109aea4a54d088c5791adc9d054b43d81c2e3366e92 +size 908421 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..acb93628c2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a0a72b7e83261852554e2dab9a37a852465c1dbbbd21f38f72b7cfe8499d96 +size 897561 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9200e8aab6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d569b43f2cfb80a8e8b8f7462603222ad90d92a2622b4a9e46e32895c981b428 +size 854923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5267ebb8cf --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cacb73ad20db88f53e83596ac56b385e53870a2439ed0cd177e66698306727b1 +size 749341 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..58f13562bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3524f53524be0b4b5899b631c6ac9f6d8a07131275c320f331296699dcc34810 +size 706199 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b7f221e24a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0662cf38f324031cd3eb1b9f9eccb9bf14c7572f562ddb17349efdc338a6f651 +size 664411 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..326b338b33 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7795c2076ef682364d2e8c4940a5f272bf52e763098cb6bd940af64e80570cc6 +size 722177 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..700f4bf312 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ab26086c9ae08a3ca47fc2261590c4392116b129bd06c48dd44a179746a9db +size 680489 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..18087644cc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97fc475fe55411a499e006f81573dab33d6de72087276e4ab3a8e7798b115bc0 +size 888949 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..70149c1c22 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf01815e2276d118aedda8b107d24af36559f197fa806edce65493aace3a0ea2 +size 780555 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2eb4af2c84 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:191c2ea5cf2990aad431540b01c44793b9b821d496c798b77791aebdfc2e2db4 +size 735631 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bbecfe02d7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e7b92467cb5e6c47c35b8719627005b3e221e3e6a9d7c02320d5b752398bc72 +size 616088 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e48c80c79a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7526c6f9fb6854537acfd2b197355396c03b00e99aa4b39c8590f394b651336 +size 686443 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e71b9a7525 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba17ff84a821840161fca0afa5bb8f4c01c363d287a8bf4ebafa3ebb6bca1145 +size 572820 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..968e38303a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7ff42fed0a22e842b7ee62585fada951ee7ed05739c699327ebf29c0cfe9265 +size 695297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5f63bcca01 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7afb689c225b4df88d7809b1cd9c9da88f7d84ed982ffafae4465e2e647eddd3 +size 654347 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8cef6ce5e6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d3333186826922413b70015e8b3cecb6799a6fd26fa7026a0e8506621a42a3 +size 712113 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ad3c8e4c72 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dfe5c86811ef2eb1653e8439165712a68900ddbb75435dcccdefa199e313f0c +size 669585 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..724f85ec47 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1041a4c49ca48a942518c980736cebe14e56207c57e5117e637f006844f7f7f +size 879231 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..36c3f5a74c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f26a99a34828128eb83709a620e38d994db6e241a644557e19f24cca0a392cb +size 770443 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..38933c860d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f12753c544c6e3cbdddae7ce8baa39fbe8bbe47acd1d6e353c909a5fcd8d38e0 +size 725913 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2db71d757d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdf78c49b283a6f91e5cbae1fd1d07981f958766f2933f070f4013d985b356f7 +size 606024 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8f8d40702e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aea7babadb6df4c15ccbacc445a3df5b3e2c3648f3305ae4b12ecc653a51ec25 +size 676725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3a064082e9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b15359fd76c86d2b23660297251a989b67eff96034d0768a0a9eac850d38bfe +size 561966 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d32593d129 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ea2c3c82496cea5406350e8ca496264b0b6de1b3fa2c6a96cb10dc9387884a2 +size 720539 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f7f66491ef --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a70fba502b28f70cc36ed7342f87f5d0822aefb9283d1cc1c995d785666aa11 +size 678061 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ad7d83f260 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a93c9392a77d8126b7917f20cca13322f927dd319f1048c41ea349b619965bcc +size 725565 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a38cbaa8db --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad35b27ea33ae112110667d76df0bee28daa31b6ea46d6abbff5116e39c7054 +size 682397 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..96f9a605d1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e82c0e3619b4acd369604e0a2a999e13cdf601ef436eeeb8f170a5bed1ad1f +size 906397 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d1c55b0b8d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b703d20cd31e274cebd8a90a4c80b5bc6c494f72b5b80454a8aa06c7b227d53 +size 864013 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b8d6692771 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39c6e3344102788c646191e96a85e25d312cfda2839d426e87bb81e28236716d +size 709667 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2e3446df08 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb973220050ea2b96ac9b9715ac492d8c0b473ac0fa493e74624d72ad581d83 +size 618983 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..34b069d0ca --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80548f86b4150d6a85d770a79b992d9c33540fbaba82f969ba8aedcc97fc29fe +size 659591 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3337371b45 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52d389fef16711fcd9b8db086679b45457016f72cc86742c66c1619c4e862304 +size 572312 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..60304ce446 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e3c5891720436790b3cc935ba2f3fc4ddb9e057cc9c4dc40b289028d200649 +size 710475 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5e41e4d472 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f91ca915b1c0f8a619f00be5f622b5e1797ab6427a0e094a0cd53386fca73c +size 667209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..75e3fff7bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff471bf7da48da4d83a42ff18666bc36463faaeb63efd898903c9db43c85a5dc +size 715501 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e45e2c734a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6554910891869f5cf0cae6a92fc53dba0882540d2f22edbf189b61295c46ff5 +size 672283 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..93ad75fe3f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5151ba7217ca5013213754576a2f4f118add9cfda50d1de88a713973fa057e +size 896679 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1cc5f007f0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b340a1c3ceb5062742a5dc103a850ff1e3ab5d955e1ba1f7d8fd547e92650060 +size 853949 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..07e3d6a8af --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a04c4714dbd9079dfc06ff931ad9e6481ea0e6cd82df6ab4cfd3d695cf88176 +size 699947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..16c0b33268 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c098736b924978a4d45b1c5200bbd67b228f7d6f1d3285e38a975d14ca47560 +size 608918 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1ecfe10f16 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eccef15926ae69aea4dbefd15d68cacdd290365814b9a1977f4e1dff46d65fc +size 649873 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5c01b00673 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f16e19218449947aa47e9bcab16bd1c5348a7348ee255078c11492d26f96411b +size 562248 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..edd8a0295e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25842b0e864ad012de6dcf2fc3ecd82db63e5f18986063e0df6b6291e8ef755a +size 819095 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1eea864f13 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ffe01a8d9bf4d3cf178049289f23d6e856f11ec7e724cd00929b9e6cd3485a +size 732655 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ed4b0ef285 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f45519ee498b95c06c56ea69a93cb63c1cedb7df7cb288136a931d638482d4eb +size 820919 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..67f0d335a8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca94044322ac9985243e0038bfdec5918391a6fe2d319932e05250cbebf21fd +size 731715 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..51d7539957 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edeb24e5037d9ecc5646f34e197cc46c5d808cbc24a9b56f476e349108ee3b1d +size 886319 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6134919af6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d01ab89e90de3b935817811a796a1c307009d7fedcd074293263cf44aea7e16c +size 799287 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..799dcb6853 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9f56aab26121f5839e18f2e0c3cc7c247ef6f4b60ac605c82dbed9ba198515a +size 875731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..52ad9f9196 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b0bf02522907a589863295434a90c98db22cebc25017b5629a404479ca3312 +size 832655 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..03e854b6bb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3ed118cd4fec0d6025d13648f9cc6eef5f5759a9a9b1738a4b7e3a480ed836 +size 854371 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..cb679a702b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dbe15a685736468950488f4d0431b22def8036db5c6b4c7c30aef6b7683eb8e +size 811541 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..049dbd4bb7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f245dd7a482e1612b2c67429413c6669c4d42f29c9d048b2ac16e1be590857db +size 952379 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ffcdb8e0e8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa516c241ffa38103d282924260fe8f9a00b677242a100be8c76272f26eee29d +size 934661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bf56c19e89 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0abce1dfe9d3a3745e1707e555a635b5841d63bc28ce6c16272281363c969aa5 +size 905195 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..37f37aab59 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15defac5264fa23009c405558d33a10e191e52ba68bd1f6e5a5c332590a823ef +size 779783 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..252a775272 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97e0be1ac9771d0aee996dad4ff4fe504c05816106f085c2645347ab20f11935 +size 927859 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ef237dfbe9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0a0a6acc177bd275104af30e25e89ab91a6387d434faba5e20420e0631b7377 +size 913497 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c3108d7bc8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da94009640e035ca05a5bc5ef59e23ef7018d9e218e862c8e8fb85cab4bf40fa +size 883687 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2ddd76d9d1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79aa2d99fc1ae57ec33edfe253645f2f13fad7983b353743c63416bc7b244e73 +size 757237 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1635203832 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:545635a6f7276a0d7d84751d11f521f8b345620a0a47f4e7e955ee0460ce1af8 +size 770975 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6eb18afc7c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:334e6995b80922c686d27ad8ddc29e3e76f28ab248de3ff4cc1818093fab3eb5 +size 701513 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..234edcf4b3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f39a551af91491dea09c441a88d3c362294958f882061ebd071e4eb1a7292fb +size 766431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9e86b1d742 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f0084180e29f856ad804a6f175cb8d8ead4ab32e0bf48958e0cf7cf67a9b52 +size 720599 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..44484c5f73 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:603ac4bcb45c89fe1eecf87bd3eb9be57c448e4a8ded26ca874c59192bdd30f9 +size 949779 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6233508a4a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74e1d9effb89943bb596b51d386ed870916b7eb022f88011316c358921b2f47c +size 833245 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a3c1d1c58b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1ef4472f2e3057e94866f291b805b22543baf9fa19b0490e5b2b25f113cfaa1 +size 772387 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a7dae91772 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e6a0c13fd47693127e11ace57dcb6db7447538c115737dbc9b38a47f3b41efb +size 645543 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1cd6625422 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19d878ce117b3abb75b062b7efc23f54a0a15eceaed3f1cca938e4294840cae8 +size 731733 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2740448f30 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d093bff696b37b75036455838b2149641ac0292ea3d1675f602cf1cc7ab4f090 +size 607848 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c8bcd726bb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4376f9e7656671e9e12d385c3723bfd76a24e89dbc66ac3d7fb5cd7061bf789 +size 743695 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fc31b96533 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f389d1fd591c3a3b028e6779fffa07dba314742737d3c0fe29af1ec5379caf54 +size 677191 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f847213d46 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a5859a006addcabe0722221fd97e8095a2cf152204d361d07847b3bd042eca5 +size 742209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..39c16920f6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79af1648062379d1ef1d98f86928f24a1496696320a9bbb036043f2ae4be9241 +size 696375 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..495f4656e0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1576b630e052c51d21756ba80d458fe50607036975941541f1b83b2a0d9e1c03 +size 924569 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ec35d525f2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc88fbe18f8f9d59abafaaaf1e033eafb11629be6c60fce60c4ddad03448b8a0 +size 812921 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7640ace037 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:727a8983219f7dbacb87248dc570cc1033a2feb26cab78bf94869fd801b1d87d +size 744907 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2f4601501d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f0b9a392cc66054b401a3966858d22541d10843afc1413d0ab6128ba8e3e18b +size 623047 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..edf93d2251 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:917143bd9a3d39ee905f235376ac948d9ff9241b2ba0a33224c1e7e0e2850cc7 +size 704255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..03bb2339ec --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee2a1dc5d361878c7e051cd7544432760e732e55d6ba3a467fda3dbf1375b235 +size 586142 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7fb121eb01 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8102ca2f3bce6a9b84021d1a38346f638622a54de9b505c021068c14002aaf99 +size 771799 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..09778f7ebe --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ce3e63727864b6a3e76a03fcd080de887f6f161c1837dd79570673bcd2d46d +size 716987 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..733ceea5aa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c8b379cdd673722218364437c644f1c7b555d1e59e46c3133e6f1dc00dab3b0 +size 769227 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5da15cca70 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e791a2c6e4861b4fa4f8d1aa15f083c51d1942eed3225bc05cc5b13448cd69 +size 721669 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..7f89184cf8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9d3915652cf86806e6393dd6056615cd6dbfe0ba2d22797c248053b222e04cb +size 946853 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2960a3cfe4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5276f340860f2c1442b10276a88f76d4b8de6d0e551378afbe879781593c57fd +size 902297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2f1cddbedd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9ebdf0ad9524bae05c13c03e5a322f93e36a4c6a50a8cc88b3c2b7b6cf5262 +size 744893 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8ae53e057c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5daf8e6d84b5adf193dc7d0e1621322229fb0a023bd0e703d82da5dd994826f9 +size 646317 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..badf003e5a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be2a50970d23d643cfc4ea5f31784f7e86ae1075cc1a52c348819ecbe336d648 +size 703401 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2c0e07728e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:422152c64d710e774401173feeeb67a8fc00fe1f7ea723a92d58ca12118529d2 +size 606896 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..efeb8b3bfe --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4083899d4a1f397acd5ffda16d66c570cae463a159221c753126ae065348dd58 +size 746491 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fdac0b291f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75af58863f4ed3082a185eae4893553952457a209a5b6b8fcab64c394d5aa487 +size 691679 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..34c359cdf7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbb1964c9a716bf61efe977589ca0c09c2fb47f45ea15addf6b6b671a8c3c0da +size 745005 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fdd6ab1cac --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6aec882acb2e8b9b2773942cb6e860d442f27e1f5ef903dc32f4120f186af12 +size 697445 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..2ff779e086 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f46442ec1103bde94a08622b47baf3de296dc46f001e8f12702e57519bf36fa +size 926281 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..536cfb58e2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc7fa7fa999256f9bedd0e80b04880a3259305ed57d388caedef5c8fc0d163e0 +size 881971 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..01f5bffd42 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2029237ebf74a24e69fc91a0f248113f1e3fca7b5e57d6cc3c3d01408117208 +size 718943 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..77952de234 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5966e3e3ae908ede30bed64e953beac63e83aa11696f55f96a4c43b2d5c42558 +size 619529 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..749df16015 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b7da5c84b65a5f1e682ab4563bf278a8b3e8752b3571fd43c81f78959bff103 +size 677501 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f51d632e44 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f982f652cdf01fcd41b3cabba072bd79a3f93508bffa982d435a2143f3a0176 +size 580996 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..41764e034e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a0336e89ca94a98f23b190be967fb09174816f2ecb4e84f0f753cb982e414d4 +size 743171 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..31b4226836 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8700bbdb3c8d2657dcafcee2bb0fab97b7f1da96dea980ed62203a3e8e3a0e2e +size 642671 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5f86cfbc95 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466ea7e9b5a88ae708f8f48354044f4ec25f23e9350b327c33b58af757fec544 +size 740209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..77fbe0dafe --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:111adae64a726db2147710b9676a8f885ecded8e07558bb989f9c3e146b3d72c +size 660083 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..6de9eb2ae4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf6cf02fda6725269972a6bda918eddb5db97fa66c2885edf6c5b1450471eae +size 810543 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..704ef20280 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b61b83f87fa7fa3a504cfff02914d6557651bcf14060d91b282bd2b2c2d9f5 +size 711129 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..8706539fc6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1abe18141c1e0dad939f3c4e62a73e3c526a34758eefae1452513c8514977c41 +size 861523 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..7836ca42cd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7fd4fd8c78e00533f743ffdc817725974e1325bfa91faa18d5ed612b926d51b +size 799355 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5aae22653c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:533aa7b75a8d340d16358dc3d7a7bc8818814333722866b8bf6ba1791e6f23dd +size 810859 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..62365e5ab0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398e09a2bd3ac6296be3f63a1dcaaa6a1da4ff6ac53aed1668fc2bcc05f64f60 +size 756485 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..442f52c9b9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76e0ff5bcddb8c6427922d452cd24bc3e9b4455b7b46c72614443b942924a0f3 +size 946947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..caaff584ea --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44b4359d3eb906daeea1b2293faa4939ada223d3dbaf7eed3087590a817ce827 +size 878865 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8539e8c360 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a7fb5ce726f65a631395c8ed82ad7396ccfd1e69dfdcfc9089a031854441e0b +size 938259 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4feb7ca2dc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2657b380dfce56e3698b9e3a2a9cfb53a920648e42d9ee75af8fa157a532390a +size 898741 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..756be07fc2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a37ec89e0c18755f3ee9fabc57c347ed6fcee80be6e20f5c55fac581a12211 +size 926247 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..09c940f52f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2065754813c45b83b7da13b6a61d8d2353e8d5f69a848df2e819fb062e12ebf4 +size 819679 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..56349695bd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df90cf0140f2d7dd539ac35a77b14ae868928536dd6f2e947fa8c2a054812cb8 +size 924481 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6281e1ea68 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d7e14bfd1b6abd9c274903caa68c554c2c534d4b64f36afb050e9795a6dca49 +size 832959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d1d4725bfd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f0414fb3a33d2485c0e182d97ea4e5d506297c167c0343e24fcfcb21638ddb +size 877711 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5711d2ec07 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a051235186e0e6263923d9ba89adfc2fb0b3e7934ccf0380fe938939cfffb54 +size 777705 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2be8b913bb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe90ca9dca2d71b8bbd0ff56405b28284f081b98df7d6d3a96e261d34e895a32 +size 897861 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5453eb1ad3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315a4b5509dba7b9459770205a21c79d85878274f35164fe9658c9522f133771 +size 832345 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..901310e3e8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b01a0f1a35ddadffdebcb55d58c7aefed1c0aa05091bb9417b75b014ca4a89aa +size 890701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..473f3107cb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b5e9fb66aed2847db525c18bb3356fd43938bb57ca65fdca6ed29bfc2dac5d +size 851183 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..906a6dcd1b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d8da6476920d5e15c90f8367c8e63b9ae904c0b0701ef47cb5bd22ddaae341a +size 879775 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ad6c578dbc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d31b670f8700803e1b4aebb2fc85f1425b6be64c30ab3f08f16439e31c65d0b1 +size 776315 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a180569985 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95098327133d67be107625fb00b0098931b794e3b3327d2701313c28ee318cb8 +size 874555 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..692ec5619b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f20cec3cd7c56503ae56776c1445fd544a0a76b9e6fed30d06b6737f8755217 +size 782047 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3d891678c4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e59d4b1f25bf67f4808a59dd628ba176a1f4282d4a48c30a1a99d053b6d508ff +size 827835 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e72d8284c3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdb361dc4d3e19e3a4c243b8a316e28770ff582d9cb579d381c71ef0a77ecb7c +size 730147 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ef0436b842 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:733f982a5331aac4daff102eedafaa420728b378d9b2993867b675f22668401a +size 949597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f4383cc080 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32927a1af1ff4b1121c7dcb04933773aadfdbcf537ced6e027edc2cc5e54da70 +size 901445 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b97af7b464 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9575f5b1b497ea7a0d623d0758aa58483fa9a475b77c10afe57485403d3ce327 +size 943029 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6e83ed4163 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a57aaaa3723beebb6e118bd4fbf2e07e0def5190f451c50b8629421690eb1b5 +size 901833 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c20646a9db --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3fc34f2c042800aaca4cce5485700e7c095fd357560f07566c98651774c55de +size 929389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..cc704db5f6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f10e2b72886efc607a1243179ced8fa167d5e3b4d7614783f748c354dff5724a +size 866383 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8c8855a10f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5b321d2dbafa51a560e9ca45b0f5f7420ff00946a2edc846bcb7387d8fcd05d +size 896149 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..611a6d8521 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293b593408c31789a8d0feae5ee716b989710fe3fc5951e28aeeedd0c90dfc33 +size 788593 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ef6e843964 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:858c9d5b1985a8610471d10fb8830da29d403dcf65d8e5134515dd76cc6617a3 +size 847701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..52451cd0da --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:278c1a0b0a28167d3b890f1235a9a2999cc1baa01a4d5149d3821c6672a6766b +size 745967 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..037378c7a0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:839cdcc622259de273ee851521011c5142cfd33430352a0f6ff7857075cac09c +size 901447 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5cd5eb8a6d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cacd38e5aa66126a5055915318b8d7aec6aa78bb1b6a604d7fa96622e3a9abfc +size 852507 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fcabdfacfd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f7d2fa123265576eb83f2970a6e3c3cc76228e02b775c8e0d5d0b97bf6df8a1 +size 895077 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9db2cfd049 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34da5d98392f710fafc1c45a123f977d25a57cbaeb3b6708fd9208af244395e2 +size 853881 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..51e05e1f2a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50ce0ae208bc23d30bc3345821dcd271ec43bfdeb89e876f18842fee779f2e1a +size 880155 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..733b098db2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5af17bcf8c72f3ecf34cdb56771f5c294e87ed2bab3bea4968e47e012fb91b44 +size 824597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8c51bf1ad6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4ac297a87015d0e0f1a65469a9f796eaf271ad3bac4758675f24695f603a04 +size 850663 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..328d1df284 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efe97f94e4cc2e75a82c6d05bbdadf33a7a3c882162d2eca52ad0b341cbf0bdc +size 745625 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ba22d7e7ed --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80bf4a3fae2a524b0e5bab90bbe20c127ba1164d20789bf543ac3d85d254270a +size 803005 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4736746d6f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3386fe9c74df55efc63e7ceb59de43e419b0e02344472f760983467cfe32e375 +size 702999 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e3dd1fe87f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92fdd214e53014b7b9aea4e3cf9750ce0138bdfd9caa0df6b438c8b1c2781d60 +size 756883 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..392d79268b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33bbd0c1098200de78b2f39976f140b5b9d34c829e5756c65146c01e3757a453 +size 649477 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c0758655f3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36b34545b8cc76a426d46e5967232b2762c5571964ba966a2526b0fa8d44e727 +size 749481 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..2e253bf9aa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6b33daafe42ba36a0d49e06e927abb0c25d9458115243f56b8ab66defd6ca33 +size 648389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ebb4c3c184 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049868c7168ff341b7f1c6ca856528adfcb17045f0ee81e06418e7179924d48c +size 824107 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6b90526dd0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5ed2ad453f8b21cfc15f14aa607488a6f8548c9f41aa6d30df0f7995980a8d5 +size 718329 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e628e8c904 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49a812c07afa8f957d06232acb09a8d833ea1b887143d69669f38931038def18 +size 769367 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f60eaead9e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:758ee7a3e09fb7bf349d7c76d273572a0bd267e60b30065eabdeea25923f8d9f +size 723775 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e25e7e92d5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daafd0416b80793f39138272a5a0ebed4fefbb27d4eb5cba25853d0865bc6867 +size 759649 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..587cc61872 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2acb833609f489bac3d57e1755dfd72e42cfda520262e0fc017c1d0e397fb5f +size 713711 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ff4efa490c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264484d75982a57ee26904bda18f18081cc886ae40d318f7d6eb46a33919a8fa +size 840685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0e2a69d728 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74d91d9be3268d5b79045482fbf764ab5b15562e0f5a71ffd6c75f757e8f7f25 +size 827853 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a651366925 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2a2dd83e63291cff0129dd00b7f8526b11145ae6ea597a33edb72a8ba17fe78 +size 793947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..35dac846ce --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5040930b137d4b4602fd5b7306cc82c7c9d32fbf434aeaaab4c78a02d8e0d251 +size 693249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4e3d131799 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af91619199bf12ce472a29c11c935624b8c113692e689f700d15c0a751b7db2e +size 829833 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3b0eeddd6d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c74ac61499c232824192702a485eb4a87095a6e59489c3e5dab5224c79b0281d +size 817789 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d5be71a9c5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb823b9dbf18745899997bf0953ca6010a3ef14dbb4c54d2740c47af83652814 +size 784179 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1007c5ec5c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:517af5a3e15e62668425c253a8111d977b75e36f25d78a73e8c1755571eb1651 +size 683185 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e904bb5ac4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3769f1c7f15b43e4c34260a17d1ff58684f838c741e53e613fb09f46e42c2153 +size 688785 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..27d6f5a59b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5acfad3aa99439a8a0ff47281a4252e4932baa33b2167765a2f9f2917a0849c9 +size 661847 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..91eece20b9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d3f4a7ee45b5cdc22d5df1c1d17bf91ac78f23c173a6c51cdc6a9217996cc94 +size 705453 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..18c2cf1a14 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c17c09cc388dbfc0f248bc15c9d52b8d6f69bf486a05d9e386434034107ffb9a +size 678565 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..33d728f61c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:826054cbc6d5c4822c106d8966d223ad302e02cdff6d0cce99c71b93478f6522 +size 822645 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..00dd26cf3a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b34f1399be580edb5bcebdae118dd74d64548b928a9c4526dbdc7d06a7c1a093 +size 722737 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..46c2687ee7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:683f51a27158fc8cc77bff532a0b1a1ef06603a7e7e9399969d9d66cf5d4b730 +size 719993 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..58f84bdc14 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f04b97c36993a81a243fdd5518118abc80d9626ec944d2945251898d2ed2342e +size 599314 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..dd8f8b6069 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ccea6f6c1a924dde814450e5e1e47c38576b3a5921245e5b62486d6d61d724c +size 687479 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c6658a0592 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6e01b76e16af3749729ae8ddf43867214d6785c75fdf5637b1bdece32af660e +size 569170 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..56a027c484 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:369d0dc9b3bfd3d05e9cdf25dd1048ba74e15a85483e0c138a9eba25b66c5b9d +size 678721 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d4220aee82 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95849a2c948fac0f0d631d8fe8d44a5fdf882cd388a4e20ff7e0d80604e7e0e8 +size 650993 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..22951b0096 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbdfe9eee5cfeae23de5373911b308a4044e337426d12cecc258893a310c321c +size 695389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..537fc1f6b5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40a212320b5fb4b6cb30d332e66326d110951bd5a8019fc57113e3e3cc99948c +size 668501 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..1db1b65284 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ed57fda5e9346013f0940e167ac72d7572fdc277a94709db7d1265fc444ad3 +size 812927 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..d9077a205b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eec432011e3dbd5940009085aeea73aa7daeeabd1750132355f32b84b3dfb89b +size 712623 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9cd1ebd995 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e235d7d61d9416c55844c867a9a609d8e10a5d34095ae9a81d1a9cbb6634f6 +size 710273 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f0adf55259 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b43cfac45f351e61c3824cbbf528a33bbc83bad9f855b019e1036cc287d35c8 +size 588412 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a6b809b19b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f8b418b727195c3080c65bd4c148e9f9c1da7328252184ae839c37e7b834fb8 +size 677711 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b7b33234c2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d65ab27db390f86489f902ea1c644db1ac81170f358381d0f49fa41662c4a0 +size 558316 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..66d9a1f222 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1146d3548265d8bdb67e2ead997f139aeddbac154b405307259339c8694605ab +size 703273 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f51ae986e3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892159e08544b6dbc602ea08423fe50b1a61b302f7f23da00da2782b6cdfe2a2 +size 674609 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8fc780e650 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebd15f78077809014a66b53d8001865fcc0836cbb6df267fa52feca6c8756dc1 +size 708249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..38b804c80e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b911f70603a0acd0f4c454f1b1e7b05064641f3d924244142edf5e37634f0dce +size 679683 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..bae9e53f37 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7eca4cbfab926aecdc4c96a6ced5657eaa24b1b118f76d1270d7144b66a45b +size 837725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..bbf09ccd9f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92ac8d1a5e3b9cbd0b7fb481dae8f63308994b998f539b91a2a266fd34ecb38 +size 795143 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d35cc082ae --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2ca46457dfbfd13b10cbdf6b66c49b12fcee28cb0490e79f627729b65500d23 +size 694867 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f90c307aa0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c04330520449ab1665f10e731b665702d034be1864481e7e206fd6c260f3e334 +size 604380 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3eccf492cd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3db702ab1aa69bb59b8685a65399bcf15d26a0711925e01ce0a27d07fa07ac10 +size 660725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2af8b71163 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b9fb66147eb1d3dd5f9c0f5c4fbfba841f3cfffb871bed42946fbe4f4a5d61d +size 572460 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0daf5d819b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f13f4859821cfa742c95c7e69ca48244c9ff15e06273a6095fb4d64e749aa7c0 +size 693159 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7e234514a7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf3118fe466cd034be17476ceb07415371587b9f883d1a07073a3b624255a1e4 +size 664545 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7c364bb317 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2c33a27c6d2da67ac25c8520dcb2560a82aab41a8f8a0573d1fd39ad82a7df2 +size 698185 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8d1d5f4a41 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9770064157aee642db90098b25a5994a714aeba80a3b5760635ef4ee69f6f09 +size 669619 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..5eb3f33a8c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42bbc2e4962bbc710564c002d3e93b9e6eb014b284fc5f7d9dcec90ecd20e9f5 +size 827957 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..1a0bd6363f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94af3c7e72700752757ff9a905ac565d07a74b3d094e68e0beba1827cc4d4144 +size 785129 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..eb43f8eca5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e34ef3a74573a344fdf8ec5afa3755160f0a30e492d681b204b5bdf7cce909e +size 685099 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..98a085790a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6849a863715439b21a390794dc3a44be9298e73177048c5f06eef437cd7a5a11 +size 593526 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3743b69ec0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b216525947632846d70179c5caf77a7ad7010d1545fd9d1f846e763051183b51 +size 650957 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6dcad55920 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3c51f700c9a76b61370b9c8e6917b9d75f95ff5368051630ac46dca7696b26 +size 562346 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..af45ded1a7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1485ad7206296ece66e95d6793f4c5a86500b931904da1747d856dbdc0b3e288 +size 677885 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..572e5c8d78 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e57b9efb7bf3bd2cca345045f655c817f6320c0dc3eb2d07860d70bd1682d85 +size 641099 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4d251f059c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb8dd15fe312a5d040aaa4be1115ef9cc1e62310b42a1c6efe8d760f6379e939 +size 656919 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..4ad8c645a5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:254f8d13f74f10f168147ffaab02d88048e213bcbe0672786be2ed1489bb5c72 +size 625313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..852ae97ece --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:125d26dd4ff2d125740c6b6b188ee0dee15e86d117698f99294fe804113a7bdf +size 686941 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..df418cd79f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a096ff727513b112eeaf0cbd3745fbae4fb131167fc93e829a2fc85fffecf43b +size 639605 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ed1fcdc221 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca276d02720bc842d65184091b07d4be97f8575f59bc40ac475700ab19e27565 +size 687997 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d43971d71e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a12825289afcf3726375bd9a943df5ecf46738bb4a52507c6f477b7ff4093020 +size 648553 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..83eddfb3cb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e65deaaa6c929a0b4094da35cb4f5502e788489e915e79ed8d73f653838c9ec +size 723271 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..78d736c13e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b57668e0862ed371e8b2f953efbaf6ec722b79f20b00ac2e9661be9f42808a5 +size 639617 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bd2f9334ba --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccfb214ec0935d63dc5caf3d17cf68807da5c20665d85a4ca1243569b72db284 +size 662009 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f6f30d0a60 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef5d8b036e1c237124bd28a524fd7d541208a07ac522bfc28295107cd1e865f0 +size 590046 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b4b5436d37 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de4614293e280e129d74fbfdcd7a217b75c5d9b6cb1e0024579f9abc3039f31 +size 628929 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c06e8168dc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8b06a9597399a716f182a4d8449acae2e930c3e4ac610baffd03482b9c81049 +size 557780 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..62bf7ef3c5 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b8122ecc5e15121ffa2ca5fc49bfc07b32a2ab1b27c9fbab4e6992c7ac767b7 +size 667997 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d78000694b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8004c8c1997a93d4f1c1a603e93fda6fd7551dc0704892e101701d0bbbbc682 +size 619871 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a1e4de09c6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68219ad36d8b5e9b06b726d99ffdf6239717a9e9b1abf31027951decdaeb365a +size 669053 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6d0bc13127 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51c994f7081d27be855daf81d637e2f4a124e99ce7b76e8a1f8c52edffcebc5 +size 628819 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..c71c787a79 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b153cb36aa42ec1234bdd68f27d5dffffd7f77d005209c97a2f4bc9e9fe4c7fe +size 702303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..f7d3c83feb --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:346322589bc954d305b4e45098eaf7e91c8812a20fd967446fd7704ed3f0bc16 +size 623831 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3f76c0edb3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d2257fc2f1b42d14460df70f41429dec7356e44b1c6ef4ae752fbf646a7616 +size 639907 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2e9f5095b1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca6107538ceeac45d20ec46d5caa321d3e3362abc975baf0c73c549b8d682863 +size 570314 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..064ddf8900 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad6d5c35aa168601945ef8f0ef88988c2fed263eb0f428c441becb3fe91481a0 +size 608404 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..115653e964 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2390adda210942fa10034c7739d809cac82842af0c948feea4311b7f92dd3b59 +size 538048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..ed115bfe28 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed7c90b811d20c5f2ed7da95eea66731db702bb531c5308060586c3c6d051aa +size 688925 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..80d13db944 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82279f7508189f8692e8d14e022eed46205a3f2539c5afa40fde33a08f0b6f57 +size 640083 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a831d586d7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8d8415a0c3faedc65caeb84dfd3f125b492b97d8aff21129ca486a6d5a01057 +size 690769 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d3407a2968 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e5fa0478c3bf5b7b32adab2256976601428a1da848f0ece938639f2d59ae219 +size 648241 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..fdd513f42d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:772a33b355e00fee7457b8506ba64147f7975bac9f4abd96b73295281868f609 +size 733269 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..6e018b5479 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e59da5af42368d1fd40eaf9dc98cf7b1f8b16cca9bc277d0c6c4e9b82c23cd5 +size 699271 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..34b96644e0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21eed8fbd8186b9ee00a4599b4e007244be1348368176e19a207fd58e221fbb9 +size 646157 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..f06eaa3a42 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4133742c2c7fe4dba9a27ffb8c0b593beaf12144702540761fab4eec5e8af01e +size 581792 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..4ca0e73aec --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56423b50746c26031c87a8c2a6da59268f6863a38986f9df2e7f36e1e7659ea6 +size 613100 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..08bb985d24 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:640b46b4a7905d111d97abe04923a6f1ac75a919bfc691c448b14755867141b9 +size 549526 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0a84e1663b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60ac6eae4c6fc8088ad1b9dc3971954f42c057c44fef153229f3fe35ea6344e0 +size 669981 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..908f91b28d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d1823f99bbb6caf6c4c832d8c1110566548856af56caeeedb52567d9ff95850 +size 620349 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1c12410928 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3e3a18d1d1b49e856dfb446604c0062a5d36a09634c14d1ae89908e7e9d39c6 +size 671825 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b9e9852db4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a211cc1c5a89699df9b0f1df5dc3f80c9a312d94bcdea0ecd6937ee32e173350 +size 629297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..16be07cd0b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61fa7649408d243e721978dad0e39b493a870b6be4a3f3142dd59973a65529be +size 712303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..392de41abd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36828515591fcf79ec90dde5b1006a63c8fa4d8eeb76257264e054db5592c7b7 +size 683485 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1487877e4f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1f0681054fc0fd2e6c997afc8646b7b1442f614ad2ff8e3ee64ea2b70d92e18 +size 624055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e6d294dd49 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a1c35bb0f0989c722e3862701e21c1d1373001bf65e24234f5952e95e759c6d +size 562848 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..d0b9f07567 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6bdb4852a65bee9d480b860c55802dc9107f46bac5a40a2a8e010c8290ef234 +size 592578 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..b499f90c42 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5db67c8b5359c683d7261b2676467f56ce9fdc7396c477804a8883185c315065 +size 530582 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9933ec0fc8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b8afcdb92c8c8073c22021ebc03edfc1a323a4c9d89341fcce0354e7a49773 +size 606544 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..ef1520e04a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f73e975684b1d86533f287144deb118b7dea621d585eda8dad87759c74adc40 +size 572078 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..e243277578 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0edfa813cbcca54cebc1a9bd5996e57ee0522bedf24fb68cfba3b20a786ac0c3 +size 585702 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..65a8e65962 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff59c85a4edf8dcc202551aa26bf91cc678d73641c71eaab3073e80f3e587fd7 +size 555502 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..27366eb339 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f459469a4b78973911a99b51dd3fe5524768618518956462b7c8fdd33bac5dda +size 582966 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1b32f0ba68 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec38f7bd814ce8ec867ff15bbe25178fdbd516518f09cf8194f99addf2fdb5b +size 559408 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1c748767aa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d9bdf5c5d7a73f1c4dc03746574bf38768f9f7fb767dccbef8322a3c15f70cf +size 606098 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0c8546d33d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d73aee00b27587d0280a9fbe7c7d3854ad6ea20442f3718585b56d2bfe5248b6 +size 577804 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..ad5342c70e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6db36e85a2643d88038f3cc6ede8f8a35d18054fb9869e0ddfa62823f33ad61c +size 651141 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..028060401d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d8cc55b03ee8b609244cf3eb2ead1c9593c0260510d227408df6fff1dae7a2 +size 571384 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..e0ce7f5258 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7a6eb20903b08055dd7c3f5837f663c7eb748e8107e9cec2269ac7327632012 +size 598264 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9ffcd830df --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd6e733dd926488d35cbc967e31d8f3c6404be2e267c8d7e5117d3f50630bb21 +size 509802 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..728faa2aa2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39c4f04fd6a620c475c38d33f203d0e95fa6843d48338ec9109724394837c4cf +size 564420 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..66245f867f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63487fc69c3f851dd0570ee5a61fbe7ee7c3f38e16fd54f13b28ee2ae8d49c13 +size 479904 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..a681957c53 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23038a30ab05c142502124f1a9b0123fd0a97f193798472baa9e1d18500ba1f6 +size 565602 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..38031e5ead --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26dbafe2808b1375f3f148bc1911b8d48196215d3c8af9a8b562259a90d6603a +size 542832 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9d73d74538 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6c603f04ca7d153bec49033f0dedf74f8acf41b3a18f13f0a862e286d1a29c9 +size 587154 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..59a2b58dc2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e3de35a1080a0a831214f6fd8e81b33291c6e678e7abdcab8f18e5b0244e9a8 +size 558860 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..d2a0cc2133 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d267c7776af98b79530db87a18c287eaf88dd501f9ab03c3daaa22fbf10b2d3 +size 631087 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..df69e25e55 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fdf83f93f1cc1809b1533747c804835dd6758ea5b5ae5525c8ba7c549bdb897 +size 555598 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..c01998a34a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:469ff8540d23e82f160fc17830194be8e5df444089952b95a5edfcd5c4a954ec +size 576164 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5bc8d3538e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c30a0265e75e59d84bc8fb6e67abd9758d5ae626921930abe572829147614fb +size 492436 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..cf2ddcd2c6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7f7fc7a09836b2bd64a43affa49671897b68e77dc31ec25de2eda553af5c279 +size 543108 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..8824739fe8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3903e66655196e50d5901476337ce43e88b485f3342ffc0aa45a6202093c8ed1 +size 462538 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..9b1da6301a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d78fb26b45b0575e5e5c20ecc65f7ce6f6d24b9dade38955fb7326347ed8651 +size 599158 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7415fb510e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9433dc9888c5fd05f9404f5eac837b920848540eb9372961157676c12203d6d1 +size 571628 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..94343047c0 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26082fe3ff5afa8fc0ef00b4d545d913a52d3d327260c943f3be116c3064b7ea +size 608106 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..2ccc9c7ce7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8890811a1d76d4ac75989c67f45e33fb7ea4ffb2558cdec7475efacd99446317 +size 579022 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..9913ef672c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b477c07fc9a3b69b67f48cc2ceb8f0d1bd15c8409a729d6fd9bee68f7e95c4e2 +size 661141 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..00318225da --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d68266240235b4a7349c65831ffc792c50b12365db345581f0624ace9fca11 +size 625883 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5381da1bd1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae0ab792d21ca07ad607f38399ca958633d82f0be25d822ba2fefe05acf9b03 +size 582438 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..0a78419871 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bf4140445d531ae32a817d0216691d6ce544bbd450e0d587700d9ba10668efc +size 512550 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..6e0f4be786 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa319aba78275247c2781edde874e353746cede4e7faa32c36aa2dd41cec05b1 +size 547804 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..3dceccca84 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9084bf80b5354da393b337791cdd1ca27d80cc13f5152d6bb57e34bf1553adce +size 480284 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..fb88eb380c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77c0566c8422c8d593a79b12badbe5ccbf3e7a529b79c740c24d93af51d2b22b +size 580214 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..929eaa7c5e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:710c9c1ec7595045d685491574c9ccd1f08bf2c0fe35a25686eba4cd188fd933 +size 552684 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bd94798788 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0afd3ec4185351dba0f9ae5ce0db4941404206c25ce267bebf426495da9524b1 +size 589162 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..1480324061 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb27216dce8a61cde23f74d3ef6b3ea1563ceeb067e88d48a030ede44199d1b +size 560078 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 0000000000..4040ac61b2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:484cc5cae3fa88f0d610cdc483af82f2fa5fd489e3106904affa2e3d64d9ec42 +size 641087 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 0000000000..3e59609296 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33094b6e3d94b5b58b9ff14ea4eb11b9ddaba2fdffc92603967ba5171ae5e37 +size 610096 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..5f472df817 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03336c9243874a2428f0742476852cdcfa28f795dc4eda30245c37fdba6e8c3e +size 561126 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..36e247ddbd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cb23485d07f81bfa7fbafbe3663e34ba517f2064a29c1a248070696c32470f9 +size 493606 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..bbeb253b11 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4f1ae2f4c09e22d5bc138c5c514d8cf30ab9f78b9134b5c00dcd643cafdac4e +size 526492 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 0000000000..7d77271a5f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a20c30ce21e34006669b71a68be5cbedbfffa8ee777ec319d58644e99e057fc4 +size 462128 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/kernelMetaInfo.h b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/kernelMetaInfo.h index 48e9106ace..a827cb9acd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/kernelMetaInfo.h +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/kernelMetaInfo.h @@ -24,2547 +24,2899 @@ namespace kernels { // clang-format off -#define TLLM_GEN_VERSION "541a9315-dirty" +#define TLLM_GEN_VERSION "d4cce646-dirty" #ifndef EXCLUDE_SM_100 -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; -extern unsigned char FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; #endif // EXCLUDE_SM_100 #ifndef EXCLUDE_SM_100 -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; -extern unsigned int FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; #endif // EXCLUDE_SM_100 @@ -2601,1274 +2953,1450 @@ struct TllmGenFmhaKernelMetaInfo static const TllmGenFmhaKernelMetaInfo sTllmGenFmhaKernelMetaInfos[] = { #ifndef EXCLUDE_SM_100 -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 167184, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "315a108e45015554afb79e62c3392bf6eb7957c0c8e78bb156e309771eadc6fd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 167088, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "b2576a1a2c350c483c6a7071d97b7cef36750ed039ed2ba59ec7824b8e176e87"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 167184, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "22e018ccf438ec8c908510192950ad7fdb2e96a2765a282e0cdde33cd273cccd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 167088, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "922c9e24d0c5cb9dd57f742b9d6293dc860455fdf6e3051227451f73013a622e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 167184, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "ac28734d15e9a14cca1c45535278e13224dc193e44abfd43e4493932094ece0f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 167088, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "2c9126951be17033867ecc39673fa65d3960f123301707dd9d502fc82ee77e90"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "b5c7eaa1c06be5e6a936d458e211088c841d98b9c801124c6c3ad3c7fbd4cf50"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 167984, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "4aa574752f2a3c71154fe5f24ccaaf8d8674359cdaa900f9f258f10a2f6f6b48"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "d5a25f7db34a74ef1e1a7593a9e12e7453b38ac46555e7e142d0469521679915"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 167984, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "981d690ac2fc643a945b7a572110ab37ffc16dc1d14ef57334242ea7c9f0a8ac"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "ba44b033f361065b6c9c0591dca360f5ef4285c0199944c402db57f4dc15309b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "8e77caac3052efe358f7b9040c3e598414cae1b8047362d2829be88b68b73ac2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 200816, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "b6666f66cbdf6cd1c8d9c94110497b0dd19fea68978585a75618e375ff6db059"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "460b926a2fe15e627024defde5000d4dcd8d386a686a9b59dd800a413704aa09"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "4b935840bb64d5d6ac05189a925c7020a4b937afc276b1328f7e94f57529d341"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "9b9ebbcd421767e76b8bde8295458d1978df190a91d37a09820a01b5ac955b47"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 200816, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "345973d9df1cb8acb8ee74a5639b67a4b0a1b46c69bbe9e4e3b30581366d24bf"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "67e0353920c8f59e066f4245f5ce47a54165c1ea32f4c75d405a5f60bb7a9c10"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "66dcaba2e7e23f36ffccf96fad8307519d54a991b3ce88076b4d365bfcc0db95"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "45eeb88486314240c6d2863bfa730a53a2449baed132d7f0deac357501f1fc61"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "194d82ae8ceee6fb25308ee15a5ff440e30521eea251616cf3a804e23092a1c4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "6c0cfc8e0a60272f1ef0430f0799295b15f513ed1a34f641ebf5ecbe60aca3be"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "ae150b49a069f24caec95ff410452c06c5da964519800a5a16207b6aed7360b5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 167984, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "f76715148e02deb8019fc065892fe254120d6b435b8aef808f3b2ad20ec6ebcb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "583d8ce7dae6c3ff8f5328a01eef09ea6b00d270d96981951755cd7d139e4d78"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "e6cd75b3051d838011be950c37bf9511b3099e7d14f984e5e4da5805d37a024d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "9a08fca03bb7d45cdaaddcbd0968ffad92743bb19bcde7a0b297de5c730d0187"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "4ca59f3c19774c33364dd7b5c110347ff780ff49a33ac1ed52930bdd0968c35e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "ab2915e74ab8efbeb99884045cda8d5bbdfd4af8b502c2ec0215a78989bf7aea"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "ab17b909ced21cb90a03611c0fd0abe383cb6d5c31cb88d728e1fee616342881"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "cc79d3595fc5248469b571d44e41890a4f01007ab85cbc10e54f087c3ad43570"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "c4a9c763d77179d74debb9489df8be4b7271c85634206d3e5b38744c395eb64d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "b3287b583ef9aa28f197c0f025ec9a6aafac5f23620f06f66659687cc8869f21"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 167984, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "5bf675f1f8de2ff8ebbab82c91d66463ec22ea26ac6dd09a904d41c7f9bc665f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "0edf6cbf328da8f19cbea91de7b209421698bb454ef52fcc913cdf9c0ac0fa71"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "fee90bbcc3f684493e632c43e32e9844ec1173e05698233f3040486c07b9ab5a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "3fd9ee67372261bf6faa317d64b7d42149602cd057621d2f3f4fb4fc6a109719"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "5a4aa228748407542a312b16cce7931524ed21c18a8a43a70ae6afda14cffd4c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "5c406af3405e85263028749f34030e03213857e907ef536f05ee6b650d5c693c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "5705621d9f57612c5d13c2c387c547cd5fe6e658600aa912274e30434df9aec6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "cfd2ca395d4dcc4eb6c17c4a90737fe065841590961f9acef680792205de5da1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "cdde9fb776c542d58ef6c8d94380ec8c95dd01361bcb0ecb4bc2b38eee5e9d51"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "bf2a18f76a1fa8a873392f95aac0bfd0283e793ac70bf6de68ca3070629a1dde"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 167984, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "84b9c1fb37daa58b1670bc7045fc27ac5f78e880235241fe1349b8c57ac030f4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "d89e88553b5cd1df78614dd5b457c4df72c95d6e658a1809735ffb1f68f9425e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "ff3cc711f6e3d42a0a23e3ece4cc840d842e5c26ac98cecf9dbc657aa9307c3c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "9d71e29e8d45f8552170f193b6092832c3135a0d9cafd8e70d85a4aab2c27b91"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "ab49886664757f6dfe43f126e50d6edc3539081d4e605ddcd9a54aaa4c05d7ed"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "05a6a198d5228e1aab11baef67587ee05edd9eeb5d32d949ed505a70d5bac20c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "5e8ae51e3761a6c05fbd763cbac734fac3cd7b35eefa9c3a40ee3b0377893c98"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "08b3fdfdc2fd76e6932ade695744a195b63b49ce51afc8f1da68d793013806ea"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "a726e81754df2b09755c26ce314f6b6c4eff1518db950c883b44264f0859f92d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "dac6ad0085152e462c9904a125e00fd4e9fc3888f3982403c5feaac9b3b2b357"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 167984, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "d82cea43a2fcf0cf31e44629f6119e4f906e51f27ba175a0c5a6334cf86f5633"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "64c11720a31d3b25e8c415c1be29d1ec7246d3f5e62c93fc4ffbc0d90186f990"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "796186f8e5de34821ff85c08267b85bbc3955490b1a49884e2454ec4225178b5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "09da25c28662c7b794d821221e488728ad8e529ed81c5ebcc474ed8e561fd022"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "4c9157042b9dfad0e83fb8efc002648bb6a81512b371fd363d3343166725399a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 199904, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, "5ab7b1a3ab1f3a1697fee0f16f6da937076d2a3d73e1d413e9b934d75a10ff49"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 199808, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, "eb0a482b7ac60ad7d4d61ba7abac7bf990cc19cb576705cea72283405c86c9fc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 199904, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, "d16150b4cc9049e9d76f93392c6f7fe405fdf9efd3d241e911c1a59e744567e2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 199808, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, "e410241ae38b8068728c96bdb86b6c4948bf97a377dea94f0b547d4303674fdb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 199904, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, "83fa81fcab288eb9ab5b45292da41bbce57bff1d450c8d47fbc19901ae0fb1e4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 199808, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, "f6fa56ed2ac8ba7b9ae2dc2bf8248cd7dbabb5d9f17e9c682b7658c041d3eef7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, "54e21fc9f4c717e15ce11ce323fd2d5cb5d90b62180e1b3600c9cde10d4c3542"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 200704, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, "4334fe757ee089b99795f84b917021c60241fbeaddf5aef68e1f8212b3c011d1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, "cf1c165ab30891d00139c4bec1742e5619a5b67cfc87ca99c75330ca4f834714"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 200704, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, "c6330ec97910492eee2a1937e4bf38805e53560579cdc00b39e9c3ce39afbcfc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "61a26bd7b19aa168a6e03661c591ca2a13b3ef331c45aa4b6184621b2f10933b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "98028c5d6027c2bb1e44e6f3b32004585720ecb8e6ba66cd5a91f1adf84b4916"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "9aafbbff0ba65ba395c6e8bada545e2e75d90f9d0d6c13dd79e2a6a349be768a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "03057d404eded707248fae7cb2cf4b2bf1a2c1313983a509e2f462b4470e741b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, "25b2f5c567564adf10d310627360d5436eee3cfcea7819187602426cc11aeb91"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 200704, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, "19039899987b7b98699d4f042ec9aa0ce6602f00838d172ac601989797dcb506"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "287633aaac814c8a9f7a8f303f516b3c7794905e2349b408d0d76cf19e6364fa"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "157d3dbdf2e170e1216f6e7b889d9a2c0e05e6663567f8f59be2cbe58e546170"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "374157d820b28434da8485ecb87bb3270c7e28be1a99cc5c0edf5194a7e2daec"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "c4e3b258ae29fde739416adc9394685a664493d99381391b87c3e945ad0d0c69"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "471b91f77bd80222f106c49a216bede888bbca71b3be1c6e175dc763a2a5543b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "15d4ed858233662704e19a7281b4ca405aab2329ccab4ae48ad86bc425bcfba7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "526dd053a5a8169275b830d848bbc80cc2c58d11154b35d8fb38881ba4735ded"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "04caae7a9052442b00f6f8bb385a5938177eb95b1584a0ed061c48a81a17faf3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, "da264940ce28f8778f3eb7512ced98169f5c04c1717e1f752ef9e9418f057178"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 200704, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, "4a518f5a63115c1f3c27f5cae66365c90e7896b381c949e083d9be9437cc2bec"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "7495ed593bc4699e5a2b785b2dfc39f078f31524571d8e40b8c0c0ac944d1b35"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "eaad20ba8fe29a1cae5773018fac38ad022fae58967510ceb09fb5ef01e7ba14"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "bbc52a08b793b60b9e6c0b72842c403938ff816ee61e9c7ca81746ccd0870246"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "fae11afdb32b700324365e41ecbea86e835d3d907c542d187e722204607a4c76"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "163a3054564ec34bdf5345a9fca8effaadae42752e9e3a10f2617865f80217d5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "615d8bee0a3411819daf745b76bfb42d569f9b8d2c868455f2c445dee7c4514a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "0a56c2efd1f46c813c041138b87de2919cd4000910977515f61586e7aa794a2a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "81509ba38a99929c9d4e5724713270493f49fe0b7c7b79192da18dff6be80fe3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, "622010f06ad6ebab148be4a9165af61f9e5cdd122679ccc5eef15aed1b1459b7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 200704, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, "37e0d141b489c8dd05f6307a5da2178da349ec8638e96ced8fe0f10371ec0ad7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "da6cdf1744a63bb11a986581e4590c6f60c5e9baa87a1d80c9d8055f04da3bc7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "1cccc6076f3d1ba22b40b021af7f683ec9217702e65fb4464a21ad694531c7b6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "bec944e06482fc62923a5b2d5019f60c621ac8756cd5753100af96278c9a9e87"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "92361dee4d8b25455b3a1eda964d463125264d57e8551a99f085cca9fc1856e0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "43913d8f3f04bdde4aac1697334f4e82752723dda2c76648fd04bd30ab155388"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "21bd9e25790dc2d20fd2670bac424a58dea61f5380404a668ecf7e2863999168"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "06f84045ffb2476910bf4af10300f2a1b4dc4a3a72779c875c0c2e16c02dd009"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "dc2bd5780e9fcab8a62135e9110090f8a96093d847597e4bcba327b09eb010e9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, "486ca526cdc0a4d4a34401cda2450e0b5d41f092dcbf47cf79443c736420d7f9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 200704, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, "c7b442e1b93f1ffead381357d52ecdacdf67e66f67b2566b4f592ab0b5bd1f5f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "3d63a3407d26b4109569a76eb0a17239b85936dc15bef68af8022744fc7955a5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "2a9c564b3a53f1bdddc96163087df4fc81042cc1246780f9949102611453101f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "8ec7806392127e85440bc19ae7502c5f70030165a4b60be51be3657f70511dfb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "fee4152fb82b61048ffb6b7c789f9755050376ffe6000d11fda4acfb0a7b1549"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "d74bfd072206b8a28f8b70bc0f604c60cbc171789d1b504b0c8dd889675f6809"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "27d21c1ad4d845c759944a9a5f5b901480b2249176a09b1a8b5dc3ae6d2f0dfe"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "379ec512a8ef096e09034478d538b5945f3ce829780a52e9f08aff3db730849b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "602c720e02506436d2b7f9e6b03237ef9ec316072798d29d75a26b9567f9bf0a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "6ad19ed4d99d21ff052aa3154f761159d394c8d07dca0a4df3eef044fb3d85a2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "b97846b512abdaac26aa107d20dd4bfd382d6498ff3a3065452b5031fc32c0a8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "e6d035da2868f00e023bbb7bb614ec0f97ca84f7431590aad05d2156f184179c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "1fe62458bc8336860dcb06fdc69a5ecdaf98033f609ccfbac2b8966ec3db366f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "6870e95890f9041e93637400de5d3946cbdd58a0e863b4630a333eca0745778c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "acdaebef4aef2967fe2de6c61b95b8bfd0054c1f126e9eece75b8b179a5fb705"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 202832, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "3db3573d22a2a9c3544dd0b13e7d4ae1cb318cffb956dc4229c7a7de63aed8e9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "ea65529074b6be5e2f937050fd88989fcd10ce3a0148ef610fa53b5b81039762"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "166e5e18c2b5d8d53061579ea8c4bbf95643db61b157452379ce70eed34f9c87"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "526a23b06dab6ae258646ab8609294da18a60cec0cda09bdace8de049b78d58a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 202832, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "89bc005bf0c5260de11aaa9de7717afd2890e00907130492e232ce59e372895f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "8d5a24a707be80680d1084433c421569a6b50364d934b97434a214c2c2895900"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "8653dcc99f36b4f304ece266c11a9ec3b3adb1f472ca3c574d871ec74580ff24"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "51ca4557d1c70e1e13db97c4a89ad0a892976b5c012f2c5bade07658bfe32f81"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "ebf0a7955da21d8f0643ebed526168fa8e40136a0ff23aa7e44b7bed517c6cc2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "0ee2ee501cb909c960db7620eb646addc355e9aac6cc754f24a080ed69e37fe6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "d4dc7a6888deae7772c2a6c5f7b460131a765ee74f7be8754b2be13b37e05b27"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "4be441f4630515f99542dabfbc2b0ffac4d3307c2ca6d49a966f433c5139d466"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "18b9b4d644221d797c306a45e2bdf5b8bdcbcf40e428a70aa361e191ce949230"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "82afad39ac136262de4a208a722e0d1efebe9581989a93f06529174d4abc21b8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "c1150dc4c32e0c8ba735d7193e6a5f5dfceb0ec9cb858803bbe034071ef6cffb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "776dcebc3cf1554c9b6c8b66651ec93830546212e4e0714bf5822d20f46a4cbc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "342e64d5f4386728fe3d95e0ba1873bd55a27763deaff1d714e3b60c64089d69"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "cb3be27f4277df4fb3b434fb851f7e91a345e28980ad10d01cc9a6f77e984810"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "078322eaae240cd1c0b0ebf999f416183ecf9f7245ed9804146789c2c1f3ba94"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "5addcafbca88720e9535413c8633494c50356fdeff2fbf2f541c083be481f11c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "7a4cd48be0690ccd54fc4b3f33733edffd623d6d5b9b6caef62c1080baf21f89"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "2d9d1561324a2e06e378175f481abc2bf12fe19966e56552be138b8871244bc0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "3b0b34926e8412c24b58c473490fe0899e053bd53fec761eb085cfb9403e999e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "33d765b768353b05d469ffc7e33650914bd710a8ee8e57a87c435b9fca7fee84"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "e8efee80621b5b1540d348319ea808c3940e96a6f41b70851c5f120f5c0c0b7b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "2f8492e7f2233a7159871712604b60589b58dd97a8365312506aaa71006534c8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "bd4cc6bee86b818af523ba5d7fb839636439ea0586d28d0ed8bb0a6cb24d7d78"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "7d48eaca31e2e45377ae33b7e917fb72b855a019bdbd217e90e281fa1620d383"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "883c3062f1722b6734752ac0554500dc09e8802d00b730f1f1ac188f91043721"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "a6c6f0cb41dacd34fdd0db805501f7f67246a70c994fa9790d54cd67cd677a78"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "bb2131f5e3a4ee01685618b51d52b192b8cb9a0a80a2165b003126c5b6c00586"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "b0b5b6f201741afee56dac0a51259373f3614b1302ebccc7c0e1513033cdb5c5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "05b2d6e3140e424ce9b567387e082f5e0a75ffc36e51aef9a1cdd368ebb31b7d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "ba79b6297acf92abcb2bb70788700245cb8bba5f4d63c990a99eb35792eb744d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "470991ed1d0c6721dcd2534d9a0ec6f899a09fd1d3c57060754655fadeff7c76"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "3e08dbc12566dd6e7637cf6661b797b3ee00bc0e67fcb49f64b9085974a5005a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "9be3f4ecdd7c9be7218cb8bfe5b156e4083baceace7c79c3c34d8bac9f13c1b5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "b933d547af632586cc2e376aa02374ad518472d0e561f7e2027dc293f1fb2b5e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "ea9507e555d9d662afb87cd99ad17396b101584aa4d63f1df1b078b07ac0718e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "e0a66a215490f092e1fc289e6a227d2e6d452559cd9d943c5f9a3961ec340709"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "98a0105f5df0021492e13a682c13c451f5a97f13d4c69e21f36911ddd9f95ffc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "ccfc49e74e86eb7b2885730c5ea7c232c9c7e032f57ab8255dc1d6e5ac17c93c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "dbe048e96a4bbcca7985e8f67d5db407f9af33c0afdc50cbf7d170314e5f1055"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "5b9fda3a9fb73dd5ef90a60cd40db2dceb2a8281552981f52b0afa12df09f641"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "4b3921102eb9e840d8c75047ba63b2a0e9dfa8c99d77a3bf8d95a888dbf7d785"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "b9a2ca69554fef2e9394f6098276de059819e94b49e908a3d2283d0cdb7585c0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "7550c1405f8ca8ce3824b1179ad1f65ce1ce3e90dd75edc18e46a4fc0ce6ed80"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "707890b81e350020043e55e1cfd809629730519415f2f0c97235e90cb1b260fd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 199952, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "7712406d78f0f281327ee5ee8486da8d35b0f6fddbf3fc1647cf60cfd1dbb1b9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext", 199856, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "514b6d5ecf76de29aefdd55368a41024c0514683a0bcc1092e279b09b08ce30e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 199952, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "c3ec66e381b7884d8bfb77ab33b189d8a5be08182cf7753252aab9c14645344a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext", 199856, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "189c78aa904ed6529f086e13e2bd84343b85619308c1938a1a78dfb2a8e4c132"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 200848, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "e4c1eb01157ce7f2b65e18e0f7f34b871a338aac5e9b75b74f9a42f325e695f6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 200752, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "f527b99286c63056e6afd27fbd2e6ee530b5a8156b9bdaecaf8979b1e0d53693"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 200848, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "4a0907835c566835e6a9c863d691d653c10240db83e46cb12e3467cf56d54bc7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 200752, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "07451e05336b926f324911fe998234c239a0716d9edc3c1af85b1b9a5eb6a24a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 200848, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "032a21d285d390651ff1f9fba937b3cb5ddf557083691d07bed2a54761b7730c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 200752, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "39ec044fb9de240cfb963f39441d185ba12faebeeec370b735827669d92aa9d9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 200848, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "941c126b4e5ea7f31f5086b19c739fa93edae37afb13788059a66dc9a18b2e33"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 200752, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "2288161ce35129f9aa95fcbd74a8ef46a345e1d8caabca9c1c7d75f754a450db"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 199952, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, "187c338bdf9f13e9fa536bf0ae2d5cacc03c5d566f8b9eb42592a000cab1f374"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 199856, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, "e967f606df76ddaab47d2e7343c14c7c09bf77d7c1303c978570804470210c08"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 199952, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, "982b006ca1a0b80dea811a3e0748ba7d2c69a2c400a8c5b96a38f351dc4452bc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 199856, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, "139728d28abbb4e1b902218c8d68a06c6569eac557545dacb7f9bb0e03cb4167"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196816, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "242959dddc0b2e3957fe9a4e5a97ac9ee3170d37f1d5da582a369ff9a6fb1a8f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 209104, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "8284818251a3d1ea6ae63f666d1199428a621cd2a3aebf58c52699d156dbea03"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 208936, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, "5f01ca6f7c2fdd316f92a1297e9ed8dc69c27ba91ce999db192236ee2d2c3749"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 182992, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "974e637b68bafe045b8c92c7758698bb5a4777cf139b70385b6a72df5f7352ff"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 197328, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "6a7b6b18d4b8ed163d82f0cd207eec6b46f117ae7bfc79e39c8d0206b0fe7f63"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "8c4dc276ccfa7f5d3ada778fa781f5d4c02d8d23c832c60ba443f48229fbd1ff"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "57a4d377bd5995086371b6c4efb3ed682834451b81830ec2530ffd98c97b5946"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 208912, 384, 2, 32, 0, 3, 64, 0, 1, true, false, false, "87b99f1c5985e1b5df15ac361d32aa270e009aeb228d91cb4c4ce21938e79af7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "8748e201917480b39af09f52d967571c7261dee49f7b8b2ff77b62887b5674a3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "0a27b9320a51706823537566bdb47e426032feb9f04d76a64c7f7ab4410f74d4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 185584, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "3e281a80fa5308e634f6ee79b37f8df64458b69333c90cf4733a9d70bbc96dbe"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "b763d7110ea8f36ec61a6314708900b5050d963c980952d67f25b4a706551f46"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 197872, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "b5dec629f1156bbee63eb768f40806b7d854dbdfeba2564ad4530a22b0302a19"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "91cf8dfe4f4beb1337f504bbdbaafeb02f0c9e0b8281ae308b673c406d3a7788"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 208992, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, "52944b41ff1c552acbade356abe286a89714fe5bc7170adc66c70ceb2f2a8354"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 208896, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, "d59dbf456f2607ff0693fe35d828c38d651987655285e38c97d45c9ee95d40ef"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "56b69ee8969237c4d01e8bdd88a275310d9deabc6091cef1a950be38bfe41d23"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "e772b4e16404eb928802a834c0c25a4b92c63abb0fb5edd430f310db0ae43d05"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 174832, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "3bcf547f6b788ef8b7436514f92dd14eb98be7e5013abc61c450a3f93d4cfdff"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "e03d46f7b0218506757da8383b72de185668115aca295bfc6053abd9356b3f75"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196816, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "e6d456eeea884f5d4b4e037a4fb42b53ce370c756023d70ac593655a86678734"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 209104, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "da38ff737ea2d972b4b80d8a7ce07f19e04a1ee94b8681fe166beb4e1962a677"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 208936, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, "e338170989c7bae59d8ce6dbf964171b98a9cb519da8c3b4b0090ab2d77eed10"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 182992, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "3b81a9861dcdea337b5f85b279763c994e1dca79721e474a7be63c54d27d73f0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 197328, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "a969316ac5cc00957c84515740490a16c12052413bc6d6a056508546d192c3b4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "448ede3fcb769138ba8bef722ec3bc1ab60ad995e695a414267704f6711b6081"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "fbc1999cc207e5dfb49c00ffeeb4c61722c326873cbd333c7f2198f4b8ccf46e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 208912, 384, 2, 64, 0, 3, 64, 0, 1, true, false, false, "84132d1af261f13a594c110910f7614bdbdb7c856bc168dc440670bee50321b8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "222f9c8e1fd92afab48972909ae82f406df2f4efad5304b296a54e07cd1d56f3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "235b2bc78a1c15f236497fe993721d8d0c27432d9f6b3f5a6ff57d76d84f7021"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 185584, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "674ff5ef286c3b89135b0681094108f85837ce7437401cccd3e43fa9a6c65267"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "e1795a7c0be2c7d8dd33dedc52c3205da6b424c471ec2675a10770875f0acdcd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 197872, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "47f664576aaa7730051900ddaa9f6d08171285b7f6d4f557479a26b44a1defcd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "6e7d9a1aead2d4adb9c0403aa416cebdfe96c5b716c9d123549f983d1664b564"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 208992, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, "220c6d4ef454ca8625e34b29aa1e082c057a97a3fdcfd9cf0b52536a40cd0767"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 208896, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, "3178d0be7f609a9e62d87ad67f03c6f94efee9d62bf7262179a307d6f94d0c37"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "1663d0c3aa4367a39ca46f823c46df49235cd47009537cac007955139f573dca"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "645420c4f650174b89d699334156ee79adaf28901123929550fe7f26092731a3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 174832, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "051ae178f709252123f6883b8610367f49ab8562e82882cff55347b4d7ca95b2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "6fe971f35f9ab9dc8489bcbfd61c6e2c9ed99c35c21ae86c72e02c1c9d8e29a2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196304, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "88cfb9b8cb8cc1a1a2a71fb9fe364a39176ff5d844a0479877b1b4239683eae5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 208592, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "b947584bc612281f21756673b3fd64629ad97b279d3e4f353c24f8bf3b50facd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 226472, 384, 2, 32, 0, 3, 64, 0, 2, true, false, true, "d6fdd4c4020e785e5d3d08c741a244183960e6ca7660243292042ad772799e21"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 208936, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, "9c571f9f857558c5c35126b64f694b75425b77d7de8d22ff54dd9fd205e398c5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 182480, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "79de7108c17c40c79aa5d146ba49d0847ac2505bb036a0e6476faab9964a1c90"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 196816, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "a903d835426ee9ea1d10ae6eda24d5f14d3a7f8ebecea2a5a99f3e9f4281f2f1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "c62b22fa8b79782e88d749b78008feaa27e71fcaa81cafebf12b2cf1516d6cdd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "041b2f38e5b7057bb70dc4c3f1df15e84e2384d43c7d9fbef9038e434d9eed0e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen", 226448, 384, 2, 32, 0, 3, 64, 0, 1, true, false, true, "e690b0ef0b1334f69c31a1265de141c7cf0dffc7d0710b85db12073e7d71ff6b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 208912, 384, 2, 32, 0, 3, 64, 0, 1, true, false, false, "2df939c59e4fde85170d032ba2c82d8e5b7d6311d96a46ab30cfdc15421490fd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "d7581e5a78b9f0ac9172d592bcf1c5672aaebd1a1525c156882df170a613aff6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "ce935cde21867786bb7b1addf93f1100892472ecafbccd0285afbec590bbf4de"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 185584, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "96146e107e949e7b64765bad534b7a44548cfa97de9dcc9bbb54567d7ec3e95a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "bb29e68781f9fcd924828e20518fc7cad7b3fa194bb89aead246738516566a14"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 197872, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "e982efeb90231907ed6fbb692ad8a3067aa02faa4f558bfdc3bffb597ca86b99"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "72fcd5965163c9bdde26ab8f5f9e80ef519b5145994bcbcffbf83e6e56e69727"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 226544, 384, 2, 32, 0, 3, 64, 1, 0, true, false, true, "67f13bf1c3c16d1cb2f58bf3944bdabdff1c9afdeab7b1c16300ebc6b1130017"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 208992, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, "2ac977e9f910f326f2a5e109ae0b25592480b55d7e1c83c1806450e7cfed0cdb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen", 226448, 384, 2, 32, 0, 3, 64, 0, 0, true, false, true, "11e79b466324a76bdf8a58d3f0b7dcd30f8565bc4f045a20f6af06f27ad1de1b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 208896, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, "96326d409b181f033813166a68f0687631ba1f43b17addd36a3c2a8b70f15e43"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "58fd77b0d1193cfcbe531bd1b93929db3224a1eb46945d64291255ce30c33583"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "42f2d86700be1d204dd0158249efac8e2be457644b7228963f0a0bd66a180aa5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 174832, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "86685c486b93210515d3688076b2a0c14a21e81ba40e24b30e3b0f0fd69df05b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "77d1ac54b46ad8b83eeec80838d7bd66b99c47977e579c2c89b876d5767282bf"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196304, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "7ca9f4163715da43cf3d8b9c0a26e8ecbb1d43251376fdad36cb1b31a1116901"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 208592, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "bd541396fc3593d0f4a934825f788a2391341835aa3187356de8ceb8cd9b01e0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 226472, 384, 2, 64, 0, 3, 64, 0, 2, true, false, true, "b63a9d9bfc040a67c71ebac77bbb7300255ba90545df1d4fa1e6d62a5e77b491"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 208936, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, "9c081109ea9fced7bb599f81b2f578ef6f2ba49423c442f138b64e044dfab9ec"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 182480, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "3c0b0c069e7f52b413a87c210df09ae54fe8a2c0e9515a6fc0a1a32c6b28bfb7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 196816, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "4e588d8b2bf0e442af1f3183fe8d981ddf81d75fbb16a04076326a07e834d3cf"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "cf9f8dc7e2a49e8bbd4a8452f716c4d823d576eae4094fc13ee947b2a6288b26"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "35d19f54963ee679b5044dea7b31f3b0b896078bdfccfd30d9079143368b9d25"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen", 226448, 384, 2, 64, 0, 3, 64, 0, 1, true, false, true, "411c92c7a673b92ab94551bab572342150387c4be1e5f4db4fa36226c4cce2ff"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 208912, 384, 2, 64, 0, 3, 64, 0, 1, true, false, false, "f6873550a9a5d93998a8f478ba00bf8f087c7563cabc02c718286ce818b3b7e4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "950eca58afb46d04a3ea2ec04318d6edb1ac6e1146f39d6a6f2dc5b71fe7193a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "b5fbf204ddd1cfcc8dd9d3d8d80133c098f9d966e33c3df8c6fcff8a9ec4f659"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 185584, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "cc664e4827832b581751dc831752adb738d91a467527998ecc843aba25b2e92b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "b51b7e142404b4d9b6ce593e91242622f68f28faf0cd1a8e00b900618e335749"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 197872, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "e2959055502103ceb72e3150e6736c4f6554d50908cfccbbb445611f4ae14425"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "c6dfce8dfb7bfaec7f8c2e86ae756bdcde25f7e4facc87fc54704fdf2de1f59a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 226544, 384, 2, 64, 0, 3, 64, 1, 0, true, false, true, "0bef9a197dcae8c8f1170be32f32e8ca1dce49ed1aa9955aab6330903bd8ef08"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 208992, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, "a81dd956ee0672f45994d46b07bb4c42d1a9fe5630ebc07d08e89f54dfd458a4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen", 226448, 384, 2, 64, 0, 3, 64, 0, 0, true, false, true, "9f60238b2dd8f65b2f33606043219ba9fd9e671b82b1431d6eeaa023fe83dc72"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 208896, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, "29f5ce2982f454d26d935c0c6170c4374fed268553c9965a2ebe93ec757d2720"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "d9c24257774f9a031477ef2e97b9d1c2b2c63ea69182133e25201b503a3d2344"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "6294cf99d84816af5d85c188486c9311c3348e38495bbe441790783805af262f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 174832, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "1a70c3f9d11053e258b3809dec513f43d50e1203d54852f9bfb39cd46accdc74"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "8af152990685d3fe6f997a7d6fdafa5efddc8b0007541c83e6cc774fec4313ac"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196048, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "67a78d865a817fe2171a1c53a843f946fa578f7257278b980eb254b71b3bc16f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 208336, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "66c672549030132f7f2873ce5148e38dfad02a0b166a32284914800878e25509"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 208936, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, "13b486ed64ac288ae94fff37361de19f7109944e0def011c7d5a901101f8c322"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 182224, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "9bca34a142bd4e69b0fc04970e5f04471d023f0792e8a6b3e29c44a7bffc371f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 196560, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "f3c05d8fc3b1b26ad07c7d2b45789c184864590878699d26f04cd99bc766b9b0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "98d582c56e87b4b25914ef8a3f6b88fab218b809e7c297beee55b0ffa0faef50"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "80257879dcbe20879fc40705e676a9b3aaf459d7f22cc56e0c3aec2f43b04143"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 208912, 384, 2, 32, 0, 3, 64, 0, 1, true, false, false, "45d4e10ccc248558ae84530d9bfb8a760e765c9c339aed889a3641a8f0440989"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "a39b5342a2914edeb55b05ebbd07472f98aa80e5165b247736bb167f08ff55b3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "71a853a70a575793db473bbb79f627a83bb8c93f26725b1b782e1d41e36d107e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 185584, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "e7cc0aa0232a628b3251f83c10a92c658b6798eb4ec8495e012ed0ad28c817e5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "97ac6e7e00818820d5e53c7953c77d0b3452a76f79bc270fb15b7336c15fdc2c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 197872, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "dc778bca8ac86114fd41f125d3336a2e56908c5553389be4b9573b4d434a63b4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "aa74f10deacb0d87d06201605f52b91bdf0958eb5ef5a2136672412ce0912560"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 208992, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, "403d8875a2861fb4200bd415e7db71ac0015dc5a334a03b22ffe96c3275b6de0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 208896, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, "5fbd2f97a6ea0ff28dc5d4962cf3a6226bfb4f225fad83940da0d588a1b62733"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "a4571b866969c4b60be7c58c337e479ea17c73f96a22edc6a7c90d646cc97692"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "6b2276df8628375a5a7f9968b138cc67944cab7eb7fe098c3aaca96a4806dcdb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 174832, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "ab6b9feaa89dd5c1d199a480603e2ae94a575ed53fb3ed4912b7c8169beb9fe6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "11a6aeac14f1250b7ce16e0344acf492c011b30b349e5e3debd9f92f6ac16607"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196048, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "b9e18cbcf87063785f8c5600076096221f537a4bc8803d416bcd23b6be007fe5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 208336, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "70df3259fff8266c6137ba42a61ac41948baa768a96d12f08f81a403940280f1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 208936, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, "523ed8d066d7b44c6782552a51e882a3a668da575254d200de51153ac43bb33d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 182224, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "5e95a57d1cb28e57e6d21eb34d7948a9c7c6cd0aa4b54e127179c3956cc1a356"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 196560, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "59eb76d9f1c45483aa0f1edf34c16cef8ee1864322e60b520cc938688b957c34"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "b31ad7b202a7fcb43fb518d00f994c367352f521cb68e51203ecd645a8786eda"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "d01fdd9fd5c1d7141cfae0d6415baf803f9d78c622b6b9bc96e3f32bf6f80331"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 208912, 384, 2, 64, 0, 3, 64, 0, 1, true, false, false, "273c6204f9fbed0b8d3821481442fab94cd0836671f1c11080c6650aa729dc81"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "b3c1d5558c31580e04869738c6b2b5fe159ae71dec0bda1cf9c8bd2486e661c7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "6324f025a5000b66577cf1cc973e922af10de51be6fa6136d467186bd4489c5f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 185584, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "47b4ed0e1ac55bffb948fbe2677dcf6a0974287e18fc9365d53707d0489cee1b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "f670c6939fb66bb7528abb5850242fa7a67cc8fec29ed158bb19c5fa9494f779"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 197872, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "3a1aa73c9b8f0287017affb59ad27fce3e519f34bed82a79f1dfa17163786951"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "6df38c46af159b130f4e8a340947dca691127a5a9d96f44570e564e5cc1d8589"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 208992, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, "a17a9b753d835fab9dc5f11b5d24deff104b4fb57a905f0938587269280079bd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 208896, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, "c3e6e40f2d94f5ff014825ca1bfcaf19bc0957fdc73376b2eed55e2ad89ef23c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "eabfc459eec22f1a2c4e2601a12d0ebc0f8ce6ff07e0ea96698a6b979f24cfbf"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "932c8bd030094c8f64ad6501e442c60739a01a2947ef89e6a7ca69fab07dac4d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 174832, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "1775603c7f1edc0a77277c35a9cb483f9e3518de2d16f57f526189b1a662e808"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "ada6440a16441c2186158878a8f9a4f5daefe423543784032de09c717288508d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "1c8e9f6c6de00d15f1c810b8291ae73632d021b6e174d97cddf992f48b719931"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "9479565e888fbe3f6ead0a2d7f5a9a62bfa1eb35cafb3224493c4c1144b7eb96"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "b46cbf5754176abeac07c1734236df82e55b080efcec41b0f53ebe3a02887942"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "a25472dc8552ba831f2d312472293ceac7d5598398740a34a9e1adf74be54893"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "692df0ff5f69094c2431f23a4a677c4306f7dcce17499d2c71598e28dabfafca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "13725ec526c7094ef390ad0af0deb1f51ac046d1d53672b61b3baa2b2af388bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "c148a31a7b613bf60b93283c5892845c2e14c6af120d6ad68714c8650be99d04"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "12fb5c247c593c1d941879bf9f515db1dd0d2eadec5578ca4dfe05a8564119fc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "74ebbb82eb4769259e48d28eb0066131142dc574372906ea8bc173abeed281b1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "b0d166389e0cbf83119d78f514e081fdc3da87f2df623e705fc3ed6c20bb3983"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "98119492f347efbbcd1ea7102951b8205b6a03247b7317e168b5d911b59aabf8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "b9eadeac761c6f0ea14c03219d534603764765c70fe0075e8e738df787c60ad5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "1dce7949252e4c720ad12a79861fa595957c73cbddabe8d1ae762ed59273c8bd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "576cc7124e36cf535831d59d1dfb9aeea1c5f0ed73e01c4db8c8cc2ef01088c0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "1b66166fc1f9f8f57918113180e4b287df9caf3107830a5d18d3686247cb95f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "b8e2af9701f14904ea08d61a4105e1bb58486eda992eb6a9af579281cd10acb8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "fafd2b3574526f19a564bce07b3d89a50c03a0d6ec375bb62422891fe0372709"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "a64fba289b2094d1e91edb1b9ce93d49eafe6e751698d185cb54c61a75cf686d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "08d6bbc1edac2963093f1a41fce60a716f8dbd7392728b344c33745de4dbbbd6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "421a853dbf30ee8055fa2efd36643f177642499812dad25cbf0a1e28e7f8aabe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "2dac0c915652ac0833986a9b9eb8c3c0624bae6dd7f42ade4b33aa4d99691c4e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "160bafa0681599eff36f81274fb3aba02216d52ede90ee0eac36a857c8006854"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "8b6141c2dc326c9cdb0345c1901d468d18a7985ca63980c08454206c52dbd860"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "c5cc0e0b6567e6af17f8630a8da28460dbc64a422bdb17c83742889e6239c52c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "e0469de457582a4aa74e0df203553f04f8a92d8940aa8a27e00266ea6659d01f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "c582c82072df129ce16d8b9709c06cd231bdfc348605822c7e53b66a22cfe376"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "fc9cb9cd458f5ddce7753e5b9f8620a89cacdf7bc49d3e061fde001665549fa3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "4a333e5225b6f53392271ce71f91436b1ccc311adb992d45d19198e87faaacb1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "17cab6fe7c52e5c1e80ab4bf18d2c7db17dff138b79c4a24573c4e04f3466972"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "ff914dcc00f4dc1d0b67d63df015c922888644610e90b5f4a7e8074ce51c89b9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "8577177cdeeab589443067047ef85135d00ff162bd10a9cc1778f706084a7e63"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "172c7954d5cf33ea78f84d2b29b70e0c4ccde3ae0a520c94da9525cc1453377f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "254d774a278d34fe4488d50cfce84347cb9df902d80d1e780813f01cf83a83e3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "0b5180aca33169d0c5896fbbcf33cd4753f310752cb3cee9bf701cef31815a04"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "e2cdc3af1e936cd019a4eb0632c8af5f2b9df4b51e746e9a85bdb1c43ff5128d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "2f81b4055cc82302f368c6f1e69702c7ed4e8addaaf52e494437497c6d3b58ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "b11d435e3e79598b3f6b3d08cf600bc43f7b38a93fa807f44139f63d4482e62f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "cfcf823e437f30bdd5e5ef12d284feec56d698fed0f6c6a538920281c96dcd38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "73d1e21f708eb8802704a275fb051ceacc179156a605ef87bd3a628cd17cfcff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "a4628d1707170a31f0b64a0327c0d4af14ad6244c4b79d1ee5acf0bbf8ebd795"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "f43fe0c1e73beae3c5675813ae44ac1c4e1d5c4ca6a4c274bce8d22a6981aee8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "d268a6394da648089c97ec02b456fd5a32885da3113385f5abdeadce6c0a3958"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "bdb9f796e28c2c71e6fc1780521681521245fa9fba907be34eafa0d2576f44c1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "2f9288015ec825c0f486d75f81300f80c40157be2493ff8887ca62ec6a0c4d11"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "f289cbde96f2c1278ccba615f58220b2afd1e45d90be11612466b67f9d473c1f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "0d050ecbb0f096d52624e244795b339e1e88d4cb0a613aad9527be079b915a57"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "7c50fa16a98df245364c6b5aecbe35916e12c6289c5f0f6a6580831cf3d31a62"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "51be7d2040d6ed5641efbdd845793436f43f915981f98e78019cf0d1adc116e6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "cb49f4925eeb49031a57b89897c9e7641f9367d06733641a4877acca558c79b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "4392364f7ebba587e88b23ee889aeaa3bdf6a264397efd52562ae38a4f4d45b8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "0a4d1b2240c5df9271d2f9734c0275991ce933dd5684bbb493415272ee9b34d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "8908b551edab6b66fee032b7db53e276fe378dea51a1f70275d599a0600d3672"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "c2a6ccc4a9b2daf910946bb081cf8368b9a07a3a3a398dae610baf99ccf9ad15"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "1fd75bddcb9a8f7215a8a2bf27fc2909f38604fbe6cd176acde99d527382826b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "8fe142809684a635a1eae6d24e188969ace8dcdb3757988f889617c929db7b9d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "7923f3c2c2c3e94136d797ea43aa8d2904b6f7c64c4edc07a7dfc7653d6339ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "703fbc75b35eff1c099bba0ae6f2f2708a7d5dac0b1994b01b166cbc0ca01eaa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "211f8a76ff56b6e8ffb3488136d799a3a497d1ae4b183c021857973a935fd8ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, "a2d07c0b320bcdecd370a80487f35cc446fe0bbaecc3de094c66eac0d05680c8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, "0f92b2db06bb066638b536faf3d590b6bf3ebf19c7d3a73ea9ab7db415757078"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, "d93dd0be78c624e6bf3741283dfbb9c0da689b7eb76b43fb5e8811c0b93aa90e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, "2aee842fca4e38c88b3b6475ecc19ba3936edb844b30d734d475f0c7ff86c475"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, "30ffff602f673df6650cf2cc9f1eeeaae17fc35361899dcb0f49c0f35eb9d3ba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, "d396235adfddd7e51a6bd90280d709c4ed4e15f57bbc8859a212a84b408eb8e5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, "3091f57a01013b407d54012ce9c5de5ef2fef9950d09dc457a02f59fd615d88a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, "feebb0f3214f54aad39ac00d9dc82c3314daecaafe9bea5ca8df372a088a011a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, "811a4ad99e0e5cc1a8d97ac88b67c00f2dadd4f939e9c180f921c5e400704c57"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, "d793f213f926b0654fdc983236b3a157f1628fbd67b1ba3e5f49398913d65780"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "92c2316f4a699099b5ad32baf954af89437731cfec8f5fb9326a23d164d3794e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "409b5506c3f9eb6bd4cfe091708232d1eb60f03123a5daf939cc5026a10900e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "91aaa19677a5d161f28b2de861b2f6314723485e851b6b6eab6f1a17bec102a4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "e61e1b3eb5012ca28f33b9101b0ff18bca4d65baaa9656ca99f96cf5686a826e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, "bdf66f35d5deb73aa51962e55c3c5f0207ef71b5834a8632df1c3cc69f33045f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, "9b9805da6a5f1f75dd619698f0d873bc204bc65dbb7ee8e898ba39cecc1a593f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "03f4d07e04657c0a2c0a9e8b245911df9d6bf78431c7c7e3ff4a84ecedbd350e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "a70308b81a9148ea13f422124476cbf56f581ab36b6a9eece2b111f0d6d73e86"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "b449daf063494dfa6e661942fa58349d5adff5a712fa7772c1c240a8067a03de"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "8fcc99b89dacd8712efc657bd986a8391bccc2ba550643ccca19a7ec7b1e5832"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "9ad8083df0c250e5ba96649090af40ccc012a5bfc670c653ffdeda4c23e75d48"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "026f68ebd72b8cc596f4281356109bcb2ae7453bb86e5eea1e340325fb8d013c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "bdb4053273e058e6345312f538808a81ea5438df78939eb59dfc64c6b7aa7b08"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "9f883839d3b0a48dd68b85c0783e55670e1fd7ee4f2928177b139fc233088d7a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, "65f259fc5a7c5ed964b22367803e7fac1a7921ced4f70438dfa284d135c18d8a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, "0c99fb381b62d76bb8b9e1aa4e94aec14385dc9e61c6477a81c1b9fd24ce3814"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "e93e744a251c5db4a334729d3ad8fa11fc328fab79b00617a7727c072de17bd2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "ea86a77ccced8a57e2ca954aab7e4d5089fcfc5ee548517c6612da4267827eb9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "e716a4f913e6eaf64555c56e9075394aa1c76c6d2c06a7546b8a677cf7363cad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "7192d1727409c62106f24284c9d5636b66e6efab608bc91b7f9a7f2d609e4dee"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "3f23fbe31232a2077d3709225258326b49a4dff51486bf3b99f68717650b3852"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "ee94a3b0d22601b97ad919f43ab38324b8419f7dcec60edce158d5187ec60fdd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "bd7c316dd3e8be07de209526903ff3edd56201a896c46bdd7231c9ebfe705d21"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "cbcb09cabd8fbaedc93a2e467a69244a5af6b29e441f535eac39d893ae79ebad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, "2d6223cdfb8db21b011216964db373e27f69c3c3b538385554c9052ff34da9db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, "4bf90fa2a7781dd800c1573b6907c41e1d15dcad4da7a4d25d90f7289299cf80"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "02298d6bf89d22c024c4a124eace6a647280007b6070b3577eb4635829be489e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "03ce06539a26ca3f42c8a98ce17ad140dac926b1a263bd0f4794a1be09056f3e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "94cef620073628b5d39cd59dce857a846b0e35e054163f2f3dede099f2ea38b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "4d5f4804d97278c88bfa37449f75d22446a5e08a19bba785770b4eae4a7c4915"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "5f01a21e5443b62beb5625354d0239354530d2f1f34d55a0f90660146b34cc4e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "5869280ec91ca28215ddfbbcab5aae152aed264dc569a42608f7c4a708aaac57"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "6ae0c450b11306e8994a6937a3ab8b9c28d846b69d54cd1f7217882c4dd4062d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "4b801cd4fd6945f0f5fb4c9232508e252bbcb67e18f4ee1d4dc9e3554fb7213e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, "e47a40ac796bf8add8b6fae9401b2c338bcaa532296239898d9317c67b43b062"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, "773f8d388144f3a9fe1bcb9b2c5a75b6d41985f85012a20ab8fc15a89a8574d6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "521165088bf82925ede0b45ea0b88f7667b04d3aa6f950af488ca26fe9cb6246"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "a215a98cdaaf434fa38e857ff5c67aefd59a105e4adb1a1b66be295b1460672e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "51789a5c52fcb050bafb698d98ce5303f7dc24ea5e7b19263b44baf8f415a502"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "c15347662688c86c93d00b20a42150a9f50c5179498f39cbacd9e3d74772268d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "3540ccd512e7d6a22ea67160e3d86a6c7451315e2f268febe5e45b9a98e7b645"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "3d6d1bd4b2f0b3d0a6ee727054d6b9deca89989f05b4cde81269c741a4af0403"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "cbd7d2b39c5693a59e28c8d0d34c29c037d609bc201c03373d034307a4b66856"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "c6f35ed328263be87e8c997b17778b45927b04297bff331a2b1f713045e49b4d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "a01a6c2ee7d4c9ae589c8169b455ceda18cc96fee876eeb06ba5d687b3bda6c6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "9c4aafa36fb462aee80d6bbc74a0cb5fae0ff416505a5a17fb8e3fe7e808f0ac"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "1c24ab914ce0d4a8835f7a025cac80e9e08df27c22d445ab3ab641ab675843a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "ff17efb497f22596170307a777371ea46a6e3491b61fbb37b5507a00c2e32d55"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "762a71c5f0cff4cc0accdcd9ff0e692bc6706ed85ff7ad5f1c178f77c53c2f00"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "9a602244af4f75e5e513aaa377a7259df46078f40462eca631d53dc7d28b9628"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 194640, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "4ed448dd49c981b832c63c79bd94e2b65693fa2494bdb19bf7897831cda013f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 159776, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "8b92a70dc26fc6283afd8b128321657488d5a96818d679f3c134e2dca86814ac"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 168048, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "74180fd23b64d24774d9fb81e67f33f01060398cffa59dcf2b3fd4ce17dcda29"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 159760, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "20232505c1fc142bd55b8ccd46452a6a3aa0733ea13bd06b788f235cadc032ea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 194640, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "04e9ed738c84bd24d06b558497619fb1fda427b943a9a01ae0d71574a71b7e49"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 159776, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "665c14997ec6df0be8b25655b2096d5509d68a19f339200bc333476265594e26"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 168048, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "5c3f3d645eef456bb3a10fb1ad7379c4fbb943245ac712ae0900dc507cb00d1b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 159760, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "48ec8873abd823526b5a9201dced4cd74e8e2a001c1c1a825b8266fdfff49c9f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "523b435554070441f695ae8c7b861cbe6f746c6e6e53e17cde6d439702267866"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "bdab0ad34a0559d08c045f5c9fab827bc5fe25c8d85ab5c682fc98bb647a4d9e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "2f16d04c4ba59648af19692a362902e4c0149ebd49dbc3c2f36b2e14326eade8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "52604081f9f2feabee7c5e41d7c74d152f653ff96428453fc181649ac6329c3c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "d0948575780f50eb8d82c82c8a5b1a2eb8457ad3adb2d35da58d1edc2d7ca5d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "f2cd6fbf81014558e9e5be63c0e70deeda378e4add28366d39eadd647489abb5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "5646b98c0ceb24b357f032de099f1f79c6fac040bbfc9148c24a090779d0bd08"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "43b5d5cd9cf86e4986c915909f120dffa5670b24654d4d499caae42d9c8a4702"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "2ab038b94d616c2fff0e61d412e1c04cf2f09967136e3035f11c19384a97518d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "4f1caee6787612d0a347e2af6854c30738c32eb349acb3422cdbb462df4a54f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "d8dbf6088da88a9220c05dc25f825e8555d500d4f3ae1d3a06f0424c34457f3f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "3f0cafa92e8dbda7c8077a34e6e046fe6966bf38cbee403c5c2eb9882515fac8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "bafda4d2e0dbb78409adf625822ba721f93a8edb2c1c8bb3aa3fd84c49b81bf6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "4f25506ae161c959b5c2b1fa6f2b755ea745ec354406928e35812b189a60b929"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "05832d7876b66d5d8abfcc3f054e55c76d697480aacfb18cd6862ef1d07925ff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "050af7bc7b4150cc9fb4a75ab4d7a3eda2061398831122d2dae99245a61990c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "bfd49c05fdebf4ce8a01dc3edbb411c7bc84a717f2dc412bba1272341ffa2069"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "ba898fb060a6a15c9a1094992c92af1e10001d78dd7e0459810c3cdbb49d2fc6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "9d82fdc98d9570ab6fb381cd620bda93c01da241e74edc61611cf65cce581602"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "8eea61f80d41c11e54ef0a46f6e4c40b6e2b4cdb5b78fea517102f09d42d1938"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "88b5c27ce207d41b00211ba7b5686ca36885b01ff31bdf256b498c79d4eff27f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "da81af4ffeeec2f1f9a8941b5d1d45098094d7408bd3a9cb7510a5a328f563a4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "6d147d1d8a940c1bb7f05af24d2bf137bbdf003783218e4e3bd7c733f466b7ba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "429bf38e85d70345243c29211dfe4684b07b87bed6ab7f2c0aadf5d66a96bcfd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "2d9a6954b541643709a3ac7de62c66f72a92f79b236bcded0de69bfc3b3e1253"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "1a4dba8f7abd2d922ff049abf25c979661770ab869d1fd1fda50f52f4c375379"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "c6af94e196248db8b0f9dae6d516b70495b52f0360c20315c627349ddee03bd8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "a694c723aee345826057a39a446312bd67f48209e0618266358e3633c3313178"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "3509537ab39fc0490a1dee76bbcfa5b992735fb7e915bae0d4728cea0e2b2a9f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "17a385f4f534bf5a63feecdb4be3f378569bb572c742629b0bc9001ef70d1ccf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "612775a154ab93eb733b4fe457d70ac903007e3b71ce09cd83526575f226fee3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "4027a2b3f5b87069bb2fb2996949d2ad4971b7d610c899736a35d77a4fb40e49"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "26bd4aad50510b4a8c00745639df918afce032a1412a981c2eda97cc4923b11f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "1e08a21d032d41580897841df0652c50fb2d995489cdcd31d5ac65210ac5f2f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "d313adfb5435dd940797f02692d1fb8b40887bb4826ef2d0680f1fa81a416806"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "82fdba4b7615be73967fea00d200629f71df554cdb71826d71c058f6de146e79"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "00a95de7a7af27c6dba6f5575f1c521e30dcf4f2b2e10e9222990473e6f07514"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "c059e0a413a3a4c841b6a2750315c2069ad3802ea99d8eeb747d829bdc24a3ef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "3d6b362f151120f5883655e8e2cfbd9d8aebce7a41f7ba464a4e53cef46dfe8c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "af0972570d1aa29c1dd615ca941cdfa1a65d7d19f61d2da2ce19d2be0aaf76e0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 118000, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "cb9c79621eeb9eb005e187fa29a2d7da0f88d524bbf25cffe70366c48a0444cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext", 117904, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "f5ec8bac85de51419cc8cd27a8690881e50fd22ca3351a1d3ab867c41f0db7cb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 118000, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "befbd97a970aa7ef1fd0eefbcb1f5e31839f3d640ab8283e5244d2d068198d71"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext", 117904, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "d42507a06e860b553c36a0d32a39862c3281cf8753977d565547a5d805048ab2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 118896, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "0dd1af02cd6da93f5770f6485e6fabf5394397c9364a5b59aa0528e0d6a89a95"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 118800, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "6f49977069354f5f913fef21f293706b30770ab10f3cdccc92e2fc7dfa477103"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 118896, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "a6f850b4dc2319a562196799b3867a23e757daa308d3af2718da3b836e07946b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 118800, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "a323f41c61325ea4178bad3622e620be99c7f4b4b4d594c498367aa40a7e46b2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 118896, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "e1242a32e104f845edb26bda5fdbdd5b239996712ee92d82555696b985165ac6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 118800, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "a5a2840eb45a32731535326fb671d454945f5ed2f7be84944c31616843bdb757"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 118896, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "e73d186c72c8e6052cc4da9d9f715e706bd319b36fdfeb06350a106272f37bfe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 118800, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "64bafc6b7111468020aae7ce29fc40c313be754b01ee189791a9193bc6fb0335"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 118000, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, "11256220e7e8c46b4f5811e6ded61849cb4e4b0b36ec52ff0ffde22f1e09e47e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 117904, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, "0f81b7df7bbc3029141a4488da3e5a01ccf50fb17adc7a6a0c885b8ef1bf075c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 118000, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, "dbf18edfd1c1f1f74a7ca57e01b8707d4246ee6b3cca93f5b141c470586715b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 117904, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, "11771178cf41e11fe6477beb07d1d16f025603f967ec2daf34053781d48266ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200912, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "82abde5cf8377f79867d0237a08320b9bb5e6e84ec138c8146fddd87e6e21432"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 200912, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "dd456792d7555ae48dde1b0db334d1d92a014b336a71edf5d14d3017b59b22b7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 217128, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, "76b96bf1869e2e0e3bb2ab489c68deb0897bd14d7360c3006a96fc30e3a73ea7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 193232, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "707dae55eece6a024ee8803474bfa0ec0e6f075a43290a41ff1df35052e280d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 193232, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "b0a91600bc3ab1a80de1967c1d72b068d9ca661d248e98e17c4b418837eff089"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "0d6466b733985548945767a8338864d2c730893fc2fcd96164a9cf3f15ef7988"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "f563585cd817783b8f4981b140f3f3e92c163b6c17ad5884cfcb2cce4436fce4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 217104, 384, 2, 32, 0, 3, 64, 0, 1, true, false, false, "ea81e33497c026470e65344853158af4ef86e9d465684a198a19ad08ae97be33"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "55d750acfa394c80a0da3cc143712d3ffade8953b87768ec1de1f20f80b90746"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "5e204e388a49d0a49337e9452e92b4d2af38f1e78030f2881ddae0917103f753"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 181488, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "4b787387e31c482822dda8b737027080eff29e075f135bccf31e7ea1fd177c27"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "9cd193f2f68892dad277c233977d91dc209028ebdca7e60af54b7ecfd55858d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 179440, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "b55ccf52fde34a55e934a5eb74ddb9cb4c9a145a91ad0eb280fb733606bf5832"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "7d43ddfff228f3623ce9bc74a1307d65cc6c8da7ab76a18a4ea421a16bb60d50"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 217184, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, "8e6967f692ff3f40e00c7a2729dc4e243e6ce56fccb18c3429e09464312d5462"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 217088, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, "591b5a419daf75d4c3b90a1e6223049f752c21986b52d7eb84ec34b004146e98"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "4f92367785e4fc38f98ebb27e8c6333211bacbfe271118079d0fdb1b1746d0e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "4a7ba507048a51bfbb3d143054efeb23caeefd5ce83161944e8d1747d3780ab0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 165616, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "602dcc3a57dc3301b8d0f738c4286c5e48853fdcfdd6ca855842abfeea99f1a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "8d0bf7997a17d695538b564a46e82a0f2a64aff6deadb6e5064744ae154a3fe0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200912, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "1e2d25fb335b672c1d3e1e79e6dd90f34ed5a3575d4d1269165576b5a395e5be"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 200912, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "20f4a15b34e4dd88f35279eb51d44652bc49d49c749f30d7109497bb011ede20"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 217128, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, "3d624a42b02fff58867f5604fd4e020bb65fadce7f64827ce47395f1035bd537"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 193232, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "a52a169d6eefa72468e37174c988218b0e137fe767a5580cbcdff66c8eb6e721"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 193232, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "0bd15d06276dd6b6790cb9fcd189d245e0184ffe528bb07d8363abf921b8d08d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "5a5ba62633c803c9a2017e9f8e1a131084166b7a29399260b8455603fe582743"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "0864191e816329d15f4f6f6931eef6d85171a17245e5f0312de736c97ac1b916"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 217104, 384, 2, 64, 0, 3, 64, 0, 1, true, false, false, "714dff4cd6dc5cd9d7891e0e5775a028dbde41dbab32e448a9fe9c76421e088d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "8f7af8ab78cbef2c0877b4f8853905b9dc124376d5c6769c2ad54029fa6fe21a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "c459da4a8595d1273e47d73074c3ca97a1ae44e9179f35ccb9a215ec49ff8031"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 181488, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "aec3ca98317e866760e35dd8554c268cce007839990507a90448426be91bfa0e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "c9c3420f840be2b673cbd36e4f05a673edebdadcef8a21abba84b4c1c06ff6a7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 179440, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "ae981570627b51c024d9f242497538f746281ec88ba720e5941ad96757a80102"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "e0840c8de67e904f6746fcd8c9de6f55a8741ed345cc28d20a9e6bea62d77131"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 217184, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, "4d2fc987deb2ffdaad33bd2935df5054a5beab023487d1e2d079c3f1f54831c6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 217088, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, "f12c937dde08cec592c1d3fd2e5cd5a21485612a4e85f154e4d0732fdc613955"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "07d3024ff2736eac7567a1baab558c5fc63b4a13fbbef557a110a52a2cd3c3e6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "ba31b91f628095cefae80839dba5adbda8656d7376f8e249e653069d785c4648"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 165616, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "c6dbc1b2e206fdc3c64867e0ce944af2a2e27d3e562d8dfdd9ee169c642920fd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "8440c3f47fcf3334e6676d332adedf514385597280c521ebff5cee59cea75299"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200400, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "ddce96c2ffd90d526cb1f455048a3557bcf3b89948e152570c0f65ff6d6e4cc0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 200400, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "c9757e59382c9ab328f1fb884ac0cca627afbfc9608c31a40451d9f45e17d689"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 218280, 384, 2, 32, 0, 3, 64, 0, 2, true, false, true, "6e92d81431f96ea0ea995fbeae5dc0297efec543bf43e644d1226392377b530e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 217128, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, "68fd25d1004e892a5d5521dd21a87f5f7627f1fe20673b21b29ced1c0f5639f1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "e0c3cc3c2baa5939b348685ddfbd00bae4167ab0613a2a381b5ea419901fe49f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "a3edded2f01d7222aa3ee8408494824c5c0cfaf890a53e8a323d946b03fff671"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "ad7df064dfc4d3e31cc4da36f8a3d7dff171ed077ba9a64c9eb61235a8780ed0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "c7bc603e46a3f21c4e18ac2bcb7fbda40e444e21a23b4c022ea3ae221f38037e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen", 218256, 384, 2, 32, 0, 3, 64, 0, 1, true, false, true, "904bececd37bb54b6f92a816fb197d390a6ac5876aba628a943fa7afd872fa30"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 217104, 384, 2, 32, 0, 3, 64, 0, 1, true, false, false, "dbca61c57e9b9676825e9be74b372837a821c883d1c101739bad693fdc88dd3c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "3813b1998fead637611229fd33fd704692986920536a6f9fe1a6e8fa18436ef0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "1e1c0f1387cecb316ca2fcf1e2c102c2ae16670602fd31d00f3e7f6142914b18"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 181488, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "7651d17a49e3870649aad9959d3d9a44ffb11ff880d9fada2432dbed7fe5a98a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "c92625bdea3d042b2835319961f4cb241fce064abe0bd70dc8b6d3da8bcf607b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 179440, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "2627fd392f800175d0f14e64c247a7a6b59a8b1b22efe3b50b4afd73cccbca8c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "0accc8907c4747bfe854d811be6cd68ddbd64033a836ddc97582224eef0c8c67"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 218352, 384, 2, 32, 0, 3, 64, 1, 0, true, false, true, "385e70b16efe53934c392ea27017dbe5b154ee6097a27745918b44aec16d7b48"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 217184, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, "48fb446a7d58a78a43da7825e26d0a58e6c97bbd44b103fa44b04971e0ec9fc7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen", 218256, 384, 2, 32, 0, 3, 64, 0, 0, true, false, true, "e71d49fda7daa9bc67a4776006ced085983ef612ed5bf917cb3b194a0f9dfd97"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 217088, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, "3e3ee6799852624c68c7e7f19cd601831176a5125b1da6ddafac1f97a8b0f5a0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "5ce073a2f7eaf9e04a4d1634ac2c718cb1d649530320e81f626776ffdd4b04db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "c9d76e1ea425a3f2b92d04ce3abe427468494f8f1ee86f911a591652405864e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 165616, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "daeb3bf8eebf116eefa0f12a01819c42986b030c316d979e10990d713a96b0c7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "357d211671b691c9597486af6226e9e6604500b99814cab1def9d9373f849653"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200400, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "063c734dcb228ad5dd56734c0af0a61b4b301e5e989eaa3078d40553ccd60f60"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 200400, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "9ecdd3e9346185b60ced3fd761e19362a8282d32bf93552ae0d84da8a3420d33"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 218280, 384, 2, 64, 0, 3, 64, 0, 2, true, false, true, "a05575f656bad04e8617e84cca036d4492a7c3d70c9de9aafd16d39ae754d3ac"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 217128, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, "5d15560960c99932fffaf1174c83740c09dcbbc3b5ce4a8ee7afdefa5a9633b1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "85a2083d901585c208852fba2c8b2040c1cf5e14950d10a3d148da1440b89306"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "66007a4915996468b01925e3842dfa0ac4024c311749715a7e80819d4c0da846"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "d82dbda5cb11072e415195be1ea48569ecf2f9a6bd10eb763c503f5b76a5c07b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "671fc413dba23b381781cc7cb3cd5dceb970aebbd512b7ce0044eeec2df4a771"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen", 218256, 384, 2, 64, 0, 3, 64, 0, 1, true, false, true, "745fa38184234bd010e5f268ab9cd519f441031df05fef0237392008f6c9098a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 217104, 384, 2, 64, 0, 3, 64, 0, 1, true, false, false, "ba111f8a3dce26cbe676aa3dfc402a11c2976a62f92a546526af91206c83ee0f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "3fada15c77b6ebb5dd173f1fb97115ed05904469607eca25f80c193758d9ca63"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "4aee95728216b90cac9bcbc77aa9740683ce291875af65518ac867e5fec0cfba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 181488, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "97e7fa2733fd9a0b8e5010ecf47f4036983de3977a3abea1e7356081a9d2e0be"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "50b712defee42b5c0119f78869f94c49953adc3efb568ca7d5f92c16ab651611"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 179440, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "0e7c3c7f8f146206d98cdb35036df68ba7c1ad8c58fbd7b7f768c5468a88d357"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "d9caa194e20da47c47c9b7757d7baa9f268c07c0a829ae371f0a61e11a4dc1d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 218352, 384, 2, 64, 0, 3, 64, 1, 0, true, false, true, "80a1977882b5853dd83bce797cfab5cdd29ef45d054cf39d5f897bd4fcf4a636"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 217184, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, "9efe158ec307dc6bd9acfe4687a41620b2e3078552155887e34e15d858b86870"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen", 218256, 384, 2, 64, 0, 3, 64, 0, 0, true, false, true, "823daf13f0b2a921171df85e931d80a5b2ddd6859e78c84c0b7224ded88729d8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 217088, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, "162ec61f74f2877631ecf2018c4de60f71c88af0a0c3f339865ff27cf2c43aad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "a8e8411ba222ba7455da60ae004286f408c5565af15700f9f239b56712a9b077"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "2a77f4f496bcdf7c9feff4af324ead661115a6b3c986f75cfe6b008f42fb5018"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 165616, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "128d9388da04bc9727f01bc2183ebd3d2429959ed1b1a4c6bf1fafee4785eecb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "87370052042c1d78c51f718d9dfc9c6da122ca2144f292a4e3175a07300278c9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200144, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "9c82134f9355c54c342ba021672135e7315e082fa49f803433b0ff4dc91d3391"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 200144, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "4538c19675b0ddc5563f8eceb721702db42fd78aa87a6eb27ff5a08634f06c95"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 217128, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, "93544c2038844992c2a195183292e37772490107be41d1fc725c541dae9c25b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192464, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "3067ad6761e706450be7f221b3b24fa30f1a9de8949561f7b86ee3c683b40474"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 192464, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "1e845e198376576741c18d3cfbb00ba516b51467358ebcb661373a89ee8b9e8d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "dc0ce01c59f771b0c247810c01dab587a4d97cc7e541c31bc8ad4f40083c29a3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "fed810327246d0de6500db93c745d3720d4a134469989bc2ec1805f21823f3f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 217104, 384, 2, 32, 0, 3, 64, 0, 1, true, false, false, "4a27d11775f3332e3f2c4d2d98990fa137d1dd81ed821e53d3a26de295111873"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "25afefa7a958badd0184aacb7a083fda870ae248724965bfb8e14a1c16a17a03"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "589bc4772a54bf21d0f26bae7ac6690cb749fac9fd49e7590b3c6c28dfbb57e5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 181488, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "aff72cef74a9eacc6cb40e66e14c946615ef5c70db5f46111c25ce9f58fd1724"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "60caebc9d7d2af7efab793810a81880ca2de272ecfc4ef40a76ef937f2c8b014"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 179440, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "8e95020fe2d92725603e5589acebb0d01948ce7d144b7b274402f8d334d09db1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "1011a71cdad394ae0aeac5a4eb785cca8de10fe05c4084783c69576234af626f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 217184, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, "de2721a94041e0b6a8fa7e3c8b04adb089b30e8f10c13ebe2b40a0d1b761771f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 217088, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, "f29ea667fb44ef03e791c47ee61361c455fbc20395e131f062dead575f14ccb9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "cea39d9f27a22e6b0db4d33a440c5ed0ab837c4870613c9f3b7f5fb9f0925384"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "32dfbd34b3e21f204142757d7a64b1133bc8de3d1718d0b71e523ed3a4661e38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 165616, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "fc91ff4a7589a4fea421d8bd97a841bbd557ea8da5cbc22367f1b45e5f25cb79"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "e8e96f9be058c9a0ededebe9917e28c594fbe2eb9287a5c3cf45892bef72ada7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200144, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "789ecd4e9385284a5bbd97ce0d5b9ecfbb1adeb28df689bdf88719d5edfc5031"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 200144, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "9440a4430681b385a7a9d57cb2f33e78573812c66383d80d94cf8ad45450b010"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 217128, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, "13022179d3b7a660176d6dcde77bf1e3a210dff6bfe7fe1c129225eb2e6f7026"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192464, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "c5f274da546dfd9e781c161df2c3fe82ea0b3acbf8f9a18f39675949426ec789"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 192464, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "551c1e39423cf5d4b33c5632cef8ce105876718a835c6570098a3feb8abed550"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "02b06c19aeeac57cc2444840a901b558c1d80264e00cb19db1a0ea2157591da2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "3ce54910b6f90559c4faa1d8ed7d31c6ea29cbe2beaf1fb9a4a47335eed4ec10"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 217104, 384, 2, 64, 0, 3, 64, 0, 1, true, false, false, "69275480a0a6202f1c060c28139e6a2c381a0530fd08c454d1f5050861983ec3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "97e83b5134a86ce9f49f0ba956a926912af53856585875d39c19174fdb63f760"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "39860c3c3a7d43c7cd2b4e4262cb49aa0c0d1ec7b56d30214b5ca2a32305ba2e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 181488, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "8ac607b3e093fdc950182650e2ace955e980b8ebd60ff1c5c9fe0f87b5cce9b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "764057688c53dbbfdfd4f3c3c2aede36e322dcdd6720ed2288d1406b7ab08dd7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 179440, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "defa0a10dbb476f10caa464c2eb265a269e40148954252547665904b20e528a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "8ffd4c0a7b7598a8a93f3bd15bd92741d1ef8caf5b192b492fba1f2e22812333"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 217184, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, "02d0c56348e208e543bc71c9ff2635ed8c22b0cfe4d8892be7bd3e12ab8c7a0a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 217088, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, "5216555269efae21d3c59f602cbde63b4436ee98514aafd9a7199ded333413cf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "39a29e7b2795c59757a2192d4f519b41b4da272b74d30de0fb29e3cf95c4bf42"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "436bdf1a33ee322cf019deeb6abb64c994d09ff28dd1437f86eeed84c08d53eb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 165616, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "d9a434b52b0437b23fca1023bc22fd3df9c9546bd5174fa97df8bc7914f2bddc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "afd9058843a0e203d394a73d7a76f625a5bac7c00d48ab234d9e61ab2def27cd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "4cef98ed0018dc8d7b0ba2096648899858da24086f410f6eb6a030950da16c7a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "0b5e1d69211c9ebd97d1ef1ae9f7efd1ca4e367f0008178ac2d6ec49b64924b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "cd943c68cf347e8b70c4031d7715088964a26ad7ae1bd853f26f63b08cfb045d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "a7d8cb39419a28049c4f32acf122d3858c3feb3dcbfe2bfc2351363cb4f6005e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "dc89469e4a949735e14dfb30fe4fad7f2d6d31f605199abdc4ebddacce931b92"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "672a1203a3298e5e4a7abe278a17d49f8c419efd9ad013ccac7cc9c53b78b56c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "fafee17bf07c610645305ef1cdadcba5465d85683b74110b329e5f8cfe9edff8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "bc7b9eb019ab9567bded36ec044878180c1505550be23986cc5f5d2b9c0ecc39"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "1b87b92b4908eb9775458b92f15cd1e3db25034f916485d2d7b0e33caa022a8e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "947cb8eb637fbb291a3553e67d2180b4bb1d2f1a9b3dbdbfc84c4dd27d086f61"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "7ce68d07149cceb42d6f55fc8504d801a9225ef5710d9e69e7a1a3c2a08d5b56"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "d3f20b1a1e2151c97b89c06fdd867744f20fdb6437375f52925551f565743b4c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "b414fc307262748e00be017593de112c5627283cd22163a752edfff2fc90c7d1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "53acea86d395dfdc3baed5ef6bc8e7936e0f90dd726b9ebc6612a976b39c8f60"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "864e46d2fcda45f4b9fc1f9c1fa33250dc851a5f6b9ea9eb4d2ab8049165a28f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "8d74c50129794de1ef6cde980c871214199aa11026f791b2d782370de5a3dbe8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "c79afd16ba1efc543d0648b6391a2e616b1f6786d3d262dac13965fb3b06bc44"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "cbede312bad8cded224868f6743907011fe2ae5e532c49e1d689ac181cbc730b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "49d8e8812a97a18a6a461d09d8391604839eadc995ff819c525a0dbc210f5755"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "a5e68b7e2ef5d2ab20293a3f166a02a6003d620a092df0814f244e5afea26d63"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "d94688f83ddc2b277e5da64b544e20e7e152d7563d98b761e398842df8b1fd01"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "a736954bce0a5fd8f2cd8c8f8ef2c91f4b93cf98a369e5df524c2d6ea1201fd7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "1996477daa0416f4e840670742fd854c2ac780a15a22cb9266301f597573db02"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "54a3d44ec6fcc947622ce48877cb91d730955c8cad836ab81637bd6c83e85ef2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "bf25287a30df8d8d534597da3de8df21e3f058a66174ebd9d4e8c5e1fd4e6358"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "16ca0c19977d9f1a716f1a17510d5f02c80800a0f8de45d23fbcf40c39baae13"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "8621f1bf3e36d3ad410f1943548dab85d176851bfc179570f5161a28587f9787"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "3a817f4f296f448e334de534132c58d0bdf22c91810de8fce6faec9f3bf9c717"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "83206149bf198da4e5155c0d5852b4194b1815f401b005e5d25f951a438f9fd9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "76194af6a4bb4dea03fc7c6c68e763c23140764f309f42e2bd45763002854468"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "dc38707deee18126d89fd6815b8145718a3bb6093fa0753778d8ca48c7a3b127"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "f6a2fc0521be5d7ca16bd7323bbe4cc97ee621aff614dc1e5a99be664d71a4e9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "a8d0420f53ef608156ca20b11dd9391e30787deb4fdc3ffb36abde3aba7d7f13"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "cef4630a3937226e463b17192ae9018d1df5ed7590fab7e803501f9611bc0e92"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "fc0c2c3493d5b91908171b62de6dbd7fb38239ea4bbbc021c813f57de8c10ffb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "c21b2a182d083d1a63a11371db0b0d1d4fd812a7c86dd00119615b32cba768bf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "a3066c37644fd7a8efbc5bce929560377d6be30d65460acf88dd5ed6e5a973fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "0831b21c65371c68c6cd02e1969984da8e74864ef0e6f17cc6ecbc48cf9b3ec7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "6e6a50673b899af8e943773f220ef458df7bfd42acd5a62e23b9f7f576ff3f74"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "156467fef3434f1a9d04395f93d93f4896979dde3f9b5265b731f8d850021bb0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "171d712e88937135b8ce9117e0fcfc88e5165d1f5e29f213a267bc92906ee6b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "2a62eb1b9db0e8fca7d4dd85db16ac03f482069a1c0c8368c4dd9264dd858bce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "4c092b84d35ba70222bd0eaa8b702e0be84f79538f7d04e50e592217628eab3e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "01aa971e86f53d00e8e75d78d7575ad13abf7ca5dcdec234c5f146a8810ec832"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "a47a27de48dfd21cc7c7745fe2cc73eef94ccee4033068da2666606e29c5ceb3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "b20e656df1aac2ec170c8775c35239de059d6f3fec3310ccb786fe08ef469536"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "edfbc03742e7849993f22362fb60d7386f1a726f9ef7b01231010c58c2033435"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "7fd7aa805058426c16b2e837e9de0c66067ba122f6ace6162c15f9037d0abfad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "ea76fbafaeb4f7cf48e7daa0d45be49eef72933237e817dcb6ba779e504dc3f0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "36afdf426031d284c274211b39c0aa406e4f47bc91a9dc61e714704e795091eb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, "76433028db8412431408f31fe17611b9ae61a3b80d641171456d3bec6e304eb0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, "919dbaaf165d88001512117ec60a2abed5aa3951505b8cf3edf5899a2ec39fb0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, "648a752bbce7bcbd8ed18990b6e200777bda089eb545e79668615684b88690ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, "0837dc4ff8a2e758ce5137d945fa27cbbab22ecbc8b5a0e923ed98ce83cca188"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, "03414c388ae98a9c69bc2cc5f7b1c8e63b132fa26ce12e765964b0b976281e44"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, "d55d4f55fff7da9bdcb0478b307d1a0467c04db44a382ef0078ef9ceda3e59b1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, "bdef01e4dc8e1116b83e08f8198b2837c24fa3273f523be8a4534a768c9f7291"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, "749a8d7c69d39228792db9949526a1702d3bcac451f38539ab259f95039fe78d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, "52871a7ea2e580c081c5b953e6caddcfe004faf5ab85599da65f78b9bd443c69"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, "c9d94aaa24fac66c00940dd0c221b44551435ebff6b173a776f30b8ab187ed3b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "26e88a333aa6796034db1d1a83037189488a21a5259ffdda679171b461e88c26"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "84f0352488e0f4229f59d773e2469cf1b28be22f7a5b99268480f929c5f18be0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "e6fe8405c52c7ecffedb71d5c38b1194964eff872b3bfa2d3797e716882a8964"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "42ac4f8300fdf131e58c421efa37b67ee29bef2c7c0f4a31557596a9a69a8d42"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, "1e2445670ddd932d6c4c28af2639c5dafe91bc95cc9c5349f003fff6f9b43f21"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, "03cdfc6579f2be2db888d0d163841034b0cf9bb7e1e05223e069e8a7dea47bd2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "1f29fb505465bbb372261364cc14cdc70fff7f33ecb4a639d83840b3e1e14b65"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "9d8d65feb3025ccaecdf08eae0b5162d32988034b9978b9137e6d37bbe17be07"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "b2265d0aa7896bb6e50ba94511206f478553c1a4fbe6c2bee67c3e52f21d987e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "c67be5b72aba7528cd375518408173c206e23eb3b80814dae31e3633601b5e01"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "a72b74259619391be17e27279c73482774aacda8037cac69777a71fb20a88b35"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "4662c3fe50dafccb68c0e4b76128325ddcdb47dd438ac63c1e49900d98686a8e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "a485546408c06ff517d773167a6f175fb319e209cfda1cc9f83e01bca35bd083"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "3b6b951cc32c6e78a49c07877a6bd90e14fe52c0f23cfa21d2479e81ba54d63a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, "fadbb849f59939e8f1f7f0df3c748eb57dec08b3c0ae253288e124dec6dfbe20"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, "1246121e70eb91e344407c8bb33de739c2065bc3adccb6e9526901f75408af57"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "a5ec00107be4910b383c3cad7bdfb64024393c0deb6f0dfd1d611eddceca1468"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "c0b172964349b3e0a81f11293f7b8803dfc28e5585f29a754918946bc7ec0219"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "2d791ff2cea6c7fdc6fe355eaf7e316d949244d77be4994d759195725cc13664"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "d62e062bc12249088c3ee7d37c13d3a726139fe3677109d247af6fd374a1f128"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "2f2c8131869999cdeda30114c01b5a24aeb952491d527068c04e3dc2fe759456"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "41d89ab4813bc720e9f79e98e8a18b883f9df2d32aa2347049ff8beae53da019"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "9c60195feac399961029b11d840965b1472b0ce281623806aeaf5f32c8fa2ca7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "a9d9cd3fb510d43e48206face0a2220f833c51b4c905b4cdd3819783d226985f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, "2bacacfe9552e0c3adf6cc302450c59e44d56695509364ac1cf9f23fa2803aea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, "92cf321db576ef4932f9f6ea7b46577308c83f12ef22c1354ecc163daafbc655"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "2d11d759e36b068f506861f7a42e310bc3548e4cdd6ddd36e63ae79c491d51ec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "9a69a933d11cf2c458b45c66d5cfa0860da8705d9cd4cdde1dca969ceeec5b70"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "77c8779d96581401010c04b237c15c8a397878b7fc0b02e7f47d4cc1c89bd4f5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "358760ed7c5f8f0ffd638b8295d124cf0b35f123d38d9ca6f8c5f1f56b4de802"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "37cd8048035a4322f1ee887d47f386740ad37e8285de53ef2d66fc2164befc72"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "fed145568c19f75a64b2244bbb9bc6c1e78188d64623f031610d53eab329a5c1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "4e6fdf757dd9ef188eec1646aae743adcedc8e9acc9fb12baeaaf54adff05b8c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "88a1068ddfe8d82823d720f32da0a47fa8fd667e52ca311449fb086ca2e3969b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, "d68bbe9e16548e33548fb8561d13284090a715017213c703849b55afe8af67b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, "da2c29714a56447f7e8f9215e5f01e8ef8def72ca9ecbb8bf0f660ce6251f74d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "363a4eddd4ba1ad762ac12eb04561b28ce841fd207c0cc18ac4302ef1bf9643d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "f43edcb8ecb534d325992348855492ce4fc742ce65d98eed9dba045065cbdc76"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "fc95e3b7409c59dc10d8410d5392c31c0e6e145e723ff3e04863ab91c97ce06b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "bd480b47002faaabb430d5d915dcdde7dedfeaefc82562682240c8b2ee839ee7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "4fbe3561a7fec12da1421381335989847bfd50f0a2a176b3354aa08ee7dc01ff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "72fd9266c4335a9a82e3c334204e5cdc5b9a6d04b50fe64577c92aaa7599652c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "753aae00cb28541894a7213b02fa6543f2d356979a2109674352a77ee7f525f0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "b968763a8143a259682399a20f5e6b25a6e54487a25ab470342dd692ac695d97"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "048f5eac861d04506b8baf9ec9bd4d6364b5162debe68f42a0aeac4dc8e9756e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "5f61d1c05ac40f2b1bf5a5d2e8cae129999d01e4e1ceae195fcc0e1ee62721fc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "1aedf79e54aa63e6bdc1c6a57d50e61d3a1277ac8cf65d8ac850d5d25bfd0dca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "73c59ff4fff59868e7b89f9657698d1bb2d44f82eb8a4e95b4ef926711c05e9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "17bbd0d5a1819aa3d91f69a7a2f092e20177c2976ea038d91a6b86821faa630c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "40e4c7efce229588962c1a53fa47df96ae9e6575dc479ff47d9cca7350a62385"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "36f4cc6f87d21c070172b196f0f66b981db5693de3ced056b4e640dcebd7890b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "cecd3c60b4d7bdefe297b7ca9fabef3a2aba44321272d89bb2e8746dfea5015d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "2f95ad33a84492d06073dd8bafc2ba8f187e22dddd20a5b9c32e44f5a313c9ad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "5f510b338b6e8f42ee58e745d1b558b3e8a185997090f8d36c1c2ab4ae09f05e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "0bc06c962a07ee0737aaf82a7895fdf81cd40240a82e3a68bb0e663ba76b7445"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "88b975dbb66a420d3af98c0a947152068172f8925146ceee71c942374229a5b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "2592cb70ad681e0a413d0a3eb97cefd0ba0d6cfa4521c153629de4a207b0036c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "c77a09ad38888bc852bc624d33ec6938dfcf671b6ec4940780f24221bebfdb56"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "5bb85aa6408fda0e070c6dfeae2f5b7631ba8e7f0c20926b84fd217b2219781b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "805ef9ed19e96d8d1c6190d9bb719a3bd7b603068a4e83e0a0779ae7a6e17377"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "1641ae5278b28cecfbc29ea0f16ec81fe4c0b20b02ade90ce597cad9475ea2c1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "f460fc5238995a57f55092955a54f307c1e85533e7715d2df669a3ac51f61a08"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "11ba4fe1a04f0aace6a0810d1aa026262f5d2f0e0ffee8329d979b9e46210fa8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "e2064f84ea72bb14760c1955482eed95115a05a18b68ff5292f6446f752ad364"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "5e0f63debcca56eddebc78c5a75df0d7d9b21484a4591d59ffd75e0460bdb6d4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "3cfb1a7739956c321bfb8de1308f52cc0fa66cd4d42510d915717d30a7bf7fab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "7d5ff6820caf95eafae946ea82598d5c0d2bcabd80e948da1c24e557ea9e3656"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "5286356c8059804b8565070b8928a25bc6df26a6f5d724c6187bef1e5d44d320"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "047e361b57bf7c07299c49a58342bcb4d67ca52e57469b56acf14803d7e8ec93"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "aab645b24c03f66d2949556a0221f59948726fab05943713eeb7c0a3a6356ab9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "0c18c535996e657a46a2b222b7c9a823d421477889f5152b64f9e92b4b42a474"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "fe6f4d391b6b7f3f5ca661a0bf756b09bb5e050f1edc2072aa1110be48698fbc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "488d521a809d1aae69e97b9e492e44eefcad6886970dda048b3ca8b38df8754b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "3024663af67a4b975bd669b3fceb008f2682fee8c3fb37e84522edbe92271bca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "cf14db165c6e5f90ff4efc34320388090dc1eebbb4eefaad33ff650a05ae9012"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "e4699331986cde70ecabd545d2627340e908ec36a42fe57658bf8d074b8a7f31"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "c20a1da47fc76b6eb4b40d7d6d92ec3dc6b4a3de549a4f0f4447693f52ac8da5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "b9153680f7e1d8f45a84a971b60d6635e50979cd84f13622b0412d15d45f5ab7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "c01c9309a24ef0483648702960df2c8cc206b4209a5788b45466cd2dae98754e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "de5f10a49d0697f3275f6713492ae24505c714cb132e3b3a85021fd609fa3927"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "ebfc63947e6e821d6f890e6ccfaf30bc58574fa4852b457675caa9e32a051e02"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "e2e2250151474000b8ff3eb6c7039ca2ae6463d7eb4d19e1a54540d712c7edd1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "3ca76c48f7462aed4b800c5217c700b0a941dac72867d4c9ea6585c957a28691"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "392763d175ed269ee194fd958adba3a6b867a659ff83df20e15f767f3461d5e4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "1cc91d9988f5ceaf00e75736e4bef7b7e3ade26a92197dcc28cffe7bf1f7e3e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "1fc5a057e50e8b188f53a12ffe652b4f89d9969c2551db0e1b715e475806ad76"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "97470ee5b11d0820bcafa84e1cdd4baaf0dfa559ba529e5de6bd3db1a5221221"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "b4a204c2c3f08a4c5b041af874ecde3d048fc274673c963799b02821f52ae8f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "a0316df9372e3d632156394a1072757395ed8fe7d4eff0c6c97d8f44a79058b1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "7fa152481a8c13f5a80167e5acb674f180c36c2a77560c6034ed4f976131a1f9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "126f4d4cccc148553c0b6aba19de2c65542ce82e2af550b9aad73259586be2db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "ea78b4df0b872cd8815f8cab908a964f8923e6d6238b63309fd853d01714c5c2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "89f875c6cbceda59718908416782e88f0b2b3413a0266e7eb3c4a53e1e2e4e5c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "44ec6425d770f4059efdbe02ff561a2fa85ac784088192f26926fee5e46a7cd8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "a359d9b2e1cf1f6536417549f88e4ee95fa4653544c1513ea8c93c4f0254cd94"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "d2519cb3658f483f3680396a5ad667af387d438c5d70cb0e901920a2ee3b7f99"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "c933c4386d82f725b1bf1af80cc4e22040122a5966c808d7e49f4ec18935bd2a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "574dc862cc657f099b3b2350f9ecca8329bf8006949434ff1b6b16922e17d921"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "8c678b9b785031a7385bb06f10a74211e7ba3f8b5015918f4228f8a58a7e2043"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "45407a9d361c199c3567e6c6e6f1ff3f253f0620a70a5fe78680b726e6d3b437"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "66e2b4cc6adcbfeeafea9467982910138a6efde30261de005b06db18bf37da94"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "05a56a51d263982f2121078695930934d9e62d1d9cf87dd98f96f68033e8e269"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "cd593f8954bfadbf05687ab4dd718bf2ded05047137ade3911d0621abcbe482e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "6762f66d40886f5245747c564a78035d8d02cde01bde934934a7dba1409622b2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "a62d3af063cd08ad5def6ea2700ab6528fb799e037a7a8e21ef2bf2aa36f5242"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "7c54c92cbdb3791bc102ba9089b71cf8e831e0306ab24e0a3f4a71125105a86d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "0ae0177f253cb5a51925430c3f5ef796627ff83a308e3b189f3764cfb98c90fd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "fa4a63fda67604f36d714f07885db42c41ea9220ff38c391a9c1a8fa85fb7999"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "ff5bacea04885d43219826d12121c3017e355ea1bdc82bd76f9f11e0d8aa8044"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "bc8a53b28874f5328fe64ec2e0e524cd111938edf0a8c38bda2e5f7ca67b4130"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "bb9006662cfc2d5b7d8f88b2961785e736eb8bf3047f555185dd9163a5c21fb7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "5ee46adec41ebfd042953176884800e29f26e9dd8887ea1b3e75cc6544f2d725"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "b5a8bc6835711d9fcdc3c53678257a2f01c564a13dfaa3407be56c0b9d433c3d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "60a2e3979469ee9b9211e20e7d1a5675c587b636543ca61d992774597d57227b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 163056, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "ff0ef93857d10f839f806c447fde5dbdc57dd923a099e07ba62299257d0440f4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "7cc3360ac270fa9c647a31d3b5e5ffa07aa7ab10a0fe2ee69ff2bb3c7386a674"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157424, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "c73e1caffa5a6e93c7d2c816382c2948367b52270db4850fa6ffdef8e882c237"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "034750addcdfe3ec1055fa07e5c958ddddfda9616819bd0a15aae57fbf85c5e8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "0c55517334aeffac9d67459aae279446baabf5c9e6e7f3d887d661a8a0af11f4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "09b64d65d565b3f549917825d3c6e4270f04d87abdc22e0bfd4a4865b87f8b62"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "fbad3df83353318b3dc86e9bf9812655ebc1546f242e8a4129e36f2e1b2f1ed7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "428c9bd658cfd2fa12ebec4404d1a499c439b47a5335e86e1a6e15e3f71f24bf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "1cd32b1e725b267e8c8df63f48cb874a4a90f918baccdd5165e89a1a4716c168"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "e1cdc465ecf675e7bd9484addebbb66d3314c99a75d3e8125fcc183c3f7712c8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 163056, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "b1a46e73dbecc298827bd05959c31bf0bdb4f5b89562b30afff2e0bdf63d78a6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "22b8c826afa60bf2a995c0b76ef3c013392f53e75e601c96d4ae7de8798492bf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157424, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "422f625b29d7e47ddf116edda2a5d48e78ad742e7fb0df52e81e19674dc179db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "97ae36460cdba402c4446f531726a08ba0d38e56218039bcaadd5ed2b587340b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "774d1434bf1d3c850424f3cdbc65847d3834dc526df00daf429ab612264445a9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "937a774136791e77bd27a8b63fd85bc07eaf39537b3e7848c9a0b2c41cef19ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "571483bde78300eef026f1ae3aece3e8f04dddb776e32a1b509c630f5352efc1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "1686f32f8ad5702001156e095407639438cecca4704008bb91803871db70281b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "b80a2f191b5634e0e5ff16dcf28163d1b7d8945b2c09abc82d4ad8378ccfd971"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "8af1857eeb5f26f0238c340c387a9819ccdb82db72d0ebf0a3aa4947178cb53b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 163056, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "f2d0fcaf551c6b46e624987ac730ad74fb77ef160851d0426736b04d579bd047"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "3639bcec13970ffcfb15cae56cc3a4a51cd0ee5aaef70e9d65613c5e4ab646a8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157424, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "0945a0828b6a5565be66ab595037bd9489597a0656d4beeab4c6cff2badb130c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "b2440f07b7b19501c4bc7f5ae2c217b8abbede989dc60230d11a63441b431cd8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "b879655d0714bf6d363bbc213aa89e3f51435165dba696f375fc4fe0116045fd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "b1a2786843b7ae794366cb8eaf8925c1aefb5433a6695b5778a7bcfb30237e25"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "c823658043d5d35f3bee087aee670be1f2047a11a4482f837cfd5fbc7da66cf9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "3f504e8cccbc053e3e00c7d031306bfac4b80f8ce284b4f7f0377678c793903c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "c295c0fd5061af5335e799563f83ee088b2a0895e7e2e05f96b8a79915849276"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "aee71dcd14ec4b6f961d148092207798c41122977073adf9868db537c3c8ac9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 163056, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "e8a0076ce86576c737625f35594421433b4906228a7ef50f5e67bf5d2fc37833"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "337ba4adcde3a1cfef8329388550aaa5bed26114abe0dc85219bf0d6e3fc31bc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157424, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "ceaa22039a4386af6c8f7811016dbff134b23f52da8c91f79c23e0465796a743"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "b5799cbb60443f9191a767649c2c48fac65e1f3fe63a75403cbaa11de73ba89d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, "f03be58ecb56cb805d797ce9ab562b27917df7a4d5aa888378939695ab790307"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, "c44afaf53f7e3d8ec2b655cf82d7b73aad0a2b646ad59177e9cd117630aa1f09"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, "0b978244a154fc53e7b2d59058da89a5ea0cbe6c2f6e63d5807f0f80cb5f02e2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, "185362c3e6d51246224517bee5bea212d5e460a8f5e6a60e593f5df65edf0c02"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, "fdbed4e951b20b1c87a93412dda5e060e3c25f215b929df0d08c5906ba642f88"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, "15756cc103ca7c3a57cf289d3dad1fc901dcf4730d88d3b6640a67d70cc6b37e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, "079a79e418c61ed17bbaff48b4b42d40faf3ee89daa7f48b0d91d48e1f5980c6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, "80b76316230e7f335e6dfa0ce6e641578d410a117aa08b4060aee744ede50986"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, "e258990e60d16dd892f771e9cf4785e23fc691bb7575cf88276e875b43ef4ef1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, "3545bcd6b3ed3addaf9b210ba32712f2896cbf71b35f256693909b1bc3c67f13"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "d8d11d2dc7af0c818466062dec9e8d601aaa8e75110792e7b71af1d3d09bfe7e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "932b29deeb10c161f284e7bb1dc029460591bffcefd72b37a55f958a577b98ff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "37ce38f34567165f4ef1e0455b9f2bd7202c511b3978227e24c8281e7bece0a9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "1032de2f2911ede625612e9e3c8186390638b9446e5086e68720bd432be137db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, "02dec2d71252e60982f9ab77286e22f6d2aef5a763121f8e44c6ca6acbd93158"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, "c7d56b8f1240786d5fe3ebc46a0d02c3c8aea144134612fa0247fe04d7663a2d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "b28828851dea5cc31cfa8b1375a311e97ef63d98f104dc4658e3aeb01e0af33e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "2969b23067e242e169f1c605f9cfccba83a4917bc4a19a023b4cb4e1bb8bc8bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "3e975e1f85fd2d34aa836841814a79fccc800f3a8d8fe21e3a251bafc3c3c5fc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "a6882ab3811d99289338b5aae4f757c86f73f3cbea331145fbf42120fde09dc3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "c526c1d4e2956e415ba211930a883b0037856231934346c8788f23c41c955eec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "55ca66e8292406a5f9f95c0117647baf85ed6bf9909f46d320b64a9b690ecf84"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "fdd588fd5f8e1c061202d992189547048779dab0ea80f1910ca92d42ee7a2e3e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "812f44284800b4a88a4826c25799740c3d14c5f0a4e6b5e8cb7f9f4aa49563a6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, "f1fd0c2ab3b6204b9566c0d467da3e50772e9625ef8d50920d341c7f5df1c796"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, "ff3575c178d224d3fb4c277e48e92ca38ecec544eb31a3bfbacd839170d3b90d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "0e2adfc1e0ee5bdd802dc5f3abdb8fdd118b8f23b6030818ee9cd018fc4f32ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "0958083c57df548bdf30cde736b5a29d3c232bde49d398e1b4b52978a6d7ede2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "cf8f9cf1fda21fe8d10a5cb286da1c46b5053423677c5e2f85a7146dd7c0923e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "5f1ad7062d9057ff8d5fbb07cbade94100a384711234f661612049ac175b0fd3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "87c970b15ffbbeadbc05f7132512f8f31ca284423a4c23c1f484e69302cd5aae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "30c31dd94799772b8cdd9c2bf270d1b69d2a4137ba0efafded5c0f08b754c35e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "039fed797950d289cb6c68f4c476b7950e2f690ea33da9e9935057cb4f164ec2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "d715bf6e869f34b2135450c46fd46249dcf92dc7634a12e8e2d13e8f47b18067"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, "71d8be88313a83b78dbc6dcbab70160a709f697909ad32cf5b5505456de5470c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, "b470272311b75f49a965406cdb3d97f7f926d8e5ad117b253a883ced57d2b562"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "2483e4eca95a540c1cbc01f9f48043aa22ae90f13f2dc4ea7c890095f49ae6d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "5e1fd8da1529dddf69a63b95d82e4eb66a82d13b9517d0b9441af33ddae12b8e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "a07cf435b56ff331302c86625f84aa7be3c4b53c1536389ebae3f9ccfb64af42"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "b2dac835c198f7d3d731acf870cbb407776e1514750869c38024b597c7eaefeb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "601b2c418b9c0278ec903bed7b1509364cd28ee1ef9486006e0291fd1aa4599a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "520a0f57818009f2131175ef2204727066fac958145f6c19d877e1c6d24c0120"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "0aaab0da6fbbde848486e023a68bef0ccaccd0e18169e768d8115fa850e2b211"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "88c5a4a62ae2924e57be06deec68f2a2e07531ef33e104917abce4bbf273a1ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, "4d3966caee74d04790a5265a46e4404954d14aeb751326b817c723d5e1d5408e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, "b384491575e9664cd8008d7e193a72cc2490356939ed9054b24f4a72baa668f5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "9814fd6a3f9e854286beff5c54761488de55b5b7fd8459e11c4811807f8f39c9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "34b5a515fab401a5af250add67483cb3781646e7ac7019752101b68548c44b21"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "134560ad8b233e17637ac51fa3057ed0fcf6a8e4f8e93a3cac6466008834b1d1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "25a25bd519c5bf2ab589260b8f69db6c972950ae55b201b30a98f560e139c793"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "4d62f755e637b34ee20ab50310a79c39e95bd8d4d5bf6f34a2eba33be11f558e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "1ffd050a6ce0c2d2c0b8ea657a496eeb06f54534f2890bf701acd07fe635e77a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "87962734e77f4eeadf326556c2c6b40ee8b0a409c86fb6316e3e075db264326e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "6093aa27e894e9a968f81c0a30a9acb5cfe343c00e35afbbee87b1669f5d2a8e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "e3a552e06cd831825e50ffffd03c812723c26037d7dfcc57d3ff16083103d35b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "88eca61277dc33f9720451381085a4a7b1c7f4dbe784a1dda8cadc6e64a3094b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "0f2d451710946a3c0abd191eaa76ecb1b81ad2db7428f4c9cfc905f1c93d65cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "57f1b1609aa6691feb960ef8bcdcf6509b204eb0c7a89f7f3f7ad5d48cacc689"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "69ed399aa855a153b70cd80e8c18342af0b5c3a52f9e9d8388550813ad8fd5ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "a1d417089c77d7218a9a2cdbfc3ac904e7ce79cfe159be0026460a238bc23d93"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 194640, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "55336992564979bfe2bb92f69cb978ab005d370d4a517c01188b0d2da9181170"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 159776, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "6943248ca63cf5cc5edde807619bfbecc63f5257f8e795e4775baa75d8df6ddc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 168048, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "8ce54e94f9712235f116a365f0258c2e8cff2deabff88da6c73751c10ca2cc30"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 159760, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "5ff3b29261fb06b7da7b1128939aa66652e329548ac65906f234231c8bf53dbc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 194640, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "f487e46b85b2fe00fa165e1936b669e42a7056a2ef1b9c06797cd5cf575c965d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 159776, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "4aa5ebe2e5eb96d7a902367bac809e8f096dd124adf61efc4c01c4e60bd93199"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 168048, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "9318558af7b6e5e5d9fd50918588a2e0b414617c93efd45415be87c92c06330e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 159760, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "8c88a35a36bbb72cb40286fc801fd85a47a8123a0f9dca70710ed3422187d347"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "0053a657ece4f6a551162e4eba093d566ab8cec2ab6fffdef5420ba4b8e51566"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "d64fc2aa564df785229970a0c97ef98a70dd14cf9de22a1b4f03f92386b090da"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "cef30942a97cc4b31fb434c60aec56e2f9b6bf730705ea4661f79b9520075bff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "ec2bd1f0f809e89b1e74bc5db6836cc1ded1a042ad63782b6db889f9989ab346"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "fa195e344caa13d63d008e8e6b6f62de4a23aea581ff380515466e9e1cde52c2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "dcd624278844a672d1435f7ce218b66ddc7f91273e03a247fd76962bad056ee4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 159984, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "2f0676c3e27ceb16f15b44b2aa55f8f33466c3c3011314627431bb24091d3b9b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "6f730f02287aaac276e0815da8fb9a0c62c87d47eb89fb41b340a3b57d0c0bdf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155888, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "ef01f16f8cddb1444fcc52f741756d4641651e521f0338f160df5282c4882c6b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "a8a22d28377802c134e9b093f1cde53c0015bb726c1b3063ff96997437c40b59"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "edb333bfc1563259239a1ec2f1efcf3ccb2a4198419d77c329482cd44ecd4360"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "70761b774eb83ce9ffa0d9d55190efac11e0b1eccc32fa280c7ae9afdc102740"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "530709099167ce89eb04f5edbd5bf3a9fa1f6a43d29cf0c6f0e19902b6885c32"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "4950c5b497bd4bc054eadc46db1c1af77ef83c139804a5efd4f92c093dca4eb6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "a9419c3d1432ebaa246b9699bed9929ac92105957b45c18dd6a121ab6a590b03"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "15047b6b1823e9514b39ffd90665da2c27c77a609d9a87bf0ef4540a49f0cd7b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 159984, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "5bfe9411cfc9917bcd2985071bfb1042c8e16717f89ad2affade3ed9e620aa52"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "567a53896bb30551fb597efca213723d7130379c4f08738b9ce8bb7c8314a6a1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155888, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "5c179ac82d4d62257b37809cd9c0372ef6d00409dd749e6d02f2d6d87b46c899"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "999aa95db564d45597b1d812bfb3c847929ecd7e5040216cd14f2bc458e1cf45"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "05b070d0e6a0c95ab82f43298ea226e8a44bcadd3d5953c117d8026ec0bc3342"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "5c43089b2e0d3746341af07ab9fc9656191c0a7ad77985eb2d5fc6397ec51743"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "2d0e760c7585eb14e36f596213bfd3e5a9292d384ea75d9d83ea5004eabd9996"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "1c8ea4c42ab43b376d572509fc8afdcdef96fb05cfaccb8fb2b4feda8ae70311"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "add99304b8a13bb859bb29e48dff6f2a58d7e95035028d7202d80757aafbf56a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "b574dc7d35363fc13b0f5c72e1f236c4ebc937f53ca123333e279d583777013a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 159984, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "6fb35a6799ae0796b1843b27fd454874d85f11f3f592da7bcc571adb76eba3d9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "cffaea237d343845eaa1668852b1d56d48bf74dc2b88c9f1f1ae11f93916c3e1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155888, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "370e14f634a88b1db51dcdc60546fc531b292d6c15851a31ae398163f2d536f3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "cd080e91e6bfda86d86ad439743bb3342c9c9c3d9fc7af8b2fba33986fcce072"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "5655e22ee0e66e59070c491fff8a5532681ea7b25172eb31fa542d622b2bd114"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "3f4013d3aa2660be49133fd597764a5a83feabd5b8c878b34a09f0ef76f94751"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "10ba3a726b577072d8d52acc6b501d2e22ec638a31c5c3c1fcedaf27664d1ec6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "c8535950db414d02ee0786246092e8fbdfd8901170a31880f592c278840b7945"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "38355a5efce5541d34e30fa39a7afccd397186c890c4be70da784c3ed8f172b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "b1fae6d6a2163c0bad2aa51822a09d71a2a31bf6101be15d24b29eeaabf4dbd5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 159984, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "9aa74c14d63fcfb6357d5c6692993c6ec3b639100242e0caed4c48f8fd88d35b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "b6f5e788bbeafe84a588444467be3c83e5d1396c5ecbc78d2e807f3955a99a55"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155888, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "def5a4c33a32f75cefc4405f7577125df3051599a7db646c21f64bad3ae6b5ba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "c3f09d14a37f02e8c0696cfc4658c08c6af154d9c531d9384c91d9c9ad901dab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "4d0a4fa113bedf002ea9da23f12bdcea8ee379462c16220aad866216bf731b83"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "46adf23f4c33fc7607f5659e7c363b2cafb041a09c06d619560a4cda7fcc5b91"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "88149476a759c5a9c67e1f5ae618450edb02350ff22dbc28ae8ff25bcbda1f0e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "3a0fc6529ba5b974998da3a04b6e3f29b444616e76f873faa8f0949117c0bedf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "f84b6191f8c221ba121f4ea4319c843b1a7e1549500f1f5f3304ecd5dfa6f43d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "8916a57d5d958cfa525b4e6f708a3484431e368923a7371c4b57e7d6d888dd49"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "04b5028e0bae33974b882ffcd86d54bc93ee1e70e7097670f4c0ccab605525b1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "3cd09b20a051ab9503a3a352439b7d764cb171bf5a7d0ea8d9508e7fb9217542"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "aa49d7976909525c28a3d174aba277c102559abcb25a8b22d23c540727222959"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "d8893604d76150f4336b449013f458c56d7201e866d0615e18fdc2ed70255b44"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "1579ced467dca0e046c322f9fcc0c589fb6a8a92a99d39e03a16f9742ff19e83"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "e49b95538183ba6ffdf1439be5e422459fe319c4a30d391828ff5cfd74c4e050"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "5bfa616d3f329b2ac3ce64a485f3c7aa8cd915b0568d107bb63eee73875ff0ef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "5a43344d1613dde700d2f8d773deb93f31cb635f86b10fb275a06996239c2cc2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "66e885fdc99adfe6878ecdd6a4e6ff53767b94a6c77b915feda81829cf154b24"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "b570bbcd601e073eaa48238fa8253442d8272ae2ceb9dbc40e1a6cebdf9db32e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "738e6508f9826e1c02daa3c3873928c5cf2edcd068b52047d01bebb3d6c2a183"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "de5c764f7e6622721f7d4b45955825c84c57937205321493ace4dfdfea9cbc8c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "cccb08367fddbeedddd68381fbaac63ff23b49d7ebd98ae4531fb9332e1f6eb9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "bb9f6f9e541fda56c028210257b7ab1e636a7bc9a3c17e899addaf7f3f4f9279"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "20ee924f9c7064bef06764bf61fc69ec898deeb0a6fe16d72adb7129debe159e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "96729bd5f4f7d52d23a1568e835e060ad23c7d52181b5c4b56c5a7b7811945e5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "f42994e686bb696a539ebcb6c59e20363cf5e09b6db29b211269e751d40abe19"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "0af73f6375cf96ffb50d8a6370805fadbe7ffc3fed7186aed998bf21a1913183"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "4faead1b631335f183d92cea3447c522259d6ee6d57660f4698bf20f30cf0d8f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "1fac5284fbe9b84c52f2a9c04fa97b4d0ec64728f6e8ba9c789e08dc30e11e58"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "b8335667133424dc1193aee9fd567d2d76f386123ea672c8b58f570baf6c3573"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "5f253fca814f23c3bda04b765c88bdcd6e18c3f08d59aea30d5b5bd557066f04"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "1265a2fd771d97f715f0ef5e24f837620f4534a1939ea01a9231c50219ec564c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "b36bdfd5d649d6a492a87257959af220a459bc9eb35299b56ae4c5748b1086cd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "3f5b6852fbf18106b7200cacad20d72dddc0a3d0d38c65ad49bf3ad70f6733ee"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "fecbf75e7b2e0550c88cd0572043847bfacddc75c0d8472e67f0c1bf457af74e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "1b9472fa835b5cc559e5f823cf1049f4661f49c7da6f1a316ff8cf53f761b058"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "fb94931c82bbf648d0556ae97095e98e82e20cdd9fc846759bfd275a72dc54b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "ccb6a0874f9c809b0ced24dfb5b72de3d4aecc2886dff814194bd82a039ab823"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "19e278aa1457d5a7fda496d98728fa63b3c5947c8c34ae50eb59b67f58e32a82"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "525b1c24c93c36d5034fae9aad905ca187bf2637eaaa8835c00ad23efc75fa53"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "611b6caf7c38c8a91fc4948ea89c15cbbcef18a3d012ac0f210a875429be910d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "cd0bcb2ecd7af5d247de7d0f59238dd166534a4ded1714c20501b9ded60aab38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "822100d06229d2757701931016b6646f79c191ade1a88ebc917bb575bf071f2f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "437b21888cdf234836c0662bf19e681424ad2ad854286dd6f1834b283d9cb453"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "8535d9d8e563dc5e873f823070da3dba95157fde0a0b615821f8158f7527898e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "3d8ccbcef5d69512cba21f56949eb55856c2cef4163e81f287b70d9d4b743b0d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "b8597421a266e7e90ca27c51b2710ecbfb1ce72d6b61f3e4d0d700699efc6be5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "b681a1110d3e6fbcb3e85c75e81a7aa4cc9c628b57806546071059ea3a09974b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "1fe850cf98fd208abe1f020ae4987f11653432385233074bec7869a7c5ef59a0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "2f1d6a6394a67e33b31fc9501584511643eeef729665536e4ed602cf2619e187"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "ded45848d3c885ff40972d3ede215e815dd520d6a835a198ea63d983d844f0c2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "e8406214daab27d969a9b75ee0f97a1bc7207388c25b703a54bad3536eb5d642"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "d010c334e859052b66fb653efbe00ae53568fb151385aede8ce7e1f6d32bc406"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "97b1184bbcc877c7eb29861132e1569952d298a22efb48e39abe5e529b78412f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "fd40aad9f4c9393205d65dd0876d4405d764af06e93d6f4630a96959e87d369f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "e21dbac52d416e8b891f6b30574753e247941b260040fb8eaaebf203354e8afc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "439ec12825c85101d18ee9d6064d9439571b9549cd3b73db9037be8ebdc3c2bc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "ba19d02a6905c07bd2939bc1a22870aafce9df75cbd021c1f0295d34b9b860e5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "6050fae2bfa044fab43b60c270c2b62979cd34249e0a05a95d4e76d9e6c0ef95"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "881063f2d6ce0c330a815ddda4b244dfbc020336b54fa5d71232da724970f6ad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "cd3f7786d312a120971eba69a5b04a5d8d30040245d225a72e24e4d62e3946ad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, "d7005604cbd77a01a054e30b43e071d8811d9b6bc63a44997b1e3933071e212a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, "ff66600f5f46798f9e246fb4260f4917f7491cd0d292af16e136e3df3d53c570"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, "86b5604c6cb5495b09bbdfe38cad6bcccb7fad796c5eb71d6201e0208f9b30cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, "ba3b36951e406588706fa61401fdb5c621d19d3e20b1661cea220a73a61cbedc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, "da9e9d3c044b37745fe1d7f94404a317696b985090f48c0fbb74fed025551c7e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, "e4808eed172f797dda945ee738c6a6eda4776299bc20843c51aeb477878e6126"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, "f05e214a03c7581aef28527124d8ee518ea170943b4b573fb7f88b5ac187e64e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, "25eecaad4444d58e885d5262988f075329ccd8ddd4f3994ea25857c796df4996"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, "48da6815e2e2dcee34a356efd6d2e9184807211e4877d43e1a13aabfb5512538"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, "e4e3498c286d9864ee3657c6e962b4a28251d6d3a2412a363dcaaaf8fbb97cc8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "dfbdc373ac74ab0fad8bbeca558d4dce19738b13c32acfc3be6a100124fbf701"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "096d105d2c0cbb6157a5738d57c40ed3529df7d90c241c88a87121dd15aba6ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "1c0d9a90a1429e80c64559a5b6abdc0514e9508fb396a4719e784fb2886b2aa5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "f3eff191851878735d93e78fc259fe377b986ddfe5b15a9f0a61ec7d2e11b01c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, "3e21e79a6ba5908a472a162670787692b628a22a3f4c33024c321bac47d34229"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, "1c5aa411dfb5cb55399706dcff4276013ac3c280cc5ae944d567a53cc01d210b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "0b122515007ec5ad2e08e7681a8fcb3253df80404055b9189f9c80f4e6c90701"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "8c5cb0f9535d5f84a2a2fc215ff33a9e7e624c7d39da3128c279c1593f21d9ef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "80725978ef85b291f35ce6e0adb26963cc164274134bc65c196049d038e80c08"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "ae2c186200223cae5a6f9fc34054b6e32e5d9150734af4b6b14edabd4f4fbf9b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "8659ffbd0c2cd703fc117609041768f1d96d879ab8e116823dc896a3cbc425b8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "c51f931c66a42b0a7441f15f922263dbe4435a15d9b2af3219275ac18763f8b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "cc5cdd2e776cc1a72d7be330e9c0cfdea8d1d140c5f16fef13a232995945ae19"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "264ef5e9be69ecbffd63ed00765d87419f2d28365eeb5e2372bc424f5dc572db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, "b2d885a0403ce6609972939973f0b6cf7de96c5936f00a88b09936f895c597aa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, "3db9c8670a41c24ceb4af9c611307d5b18940d24df02e2ab7ec8395d3ddfdb12"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "0495f167f9b28da28e6e1fba8ddc6a896a3cebbeb3d8387be78655a2e24a000e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "802dc457e2b5dc765cd97a12f316397e21803f8e612e4fb5bad518b49bceaefc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "1891b01c002b067c75456864a42d6a75dd1f63f60e650abd57378196fd1da85f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "45477899644559089411d09d5a89ee6bd2beb57d14454612f464d194b2b162cf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "856293e0d01da8a7dc1043db510b2ee4320a3cd1be77bb8365f2949607c18f46"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "23787fbb20beb5378b29127d63aad155d14f5c311bb3a9d78b827579c5cb79cb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "7612162e81690be21b293932801d9a15448fa7c06b9326ea78255a8ac5c05624"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "5d241d6e388b6f5ac50f48b817b892558ea760e284ed2edfe94b106fbec2757c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, "b782a2a1b8c05e033f94d5d0c8a7981d081435924568062858f48b27ae950c9f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, "567bff1d362c70518a321f859b97d45c838f65906b1d51d8cfb359a6e0c71ab8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "48b5a4cc12776e80488f7f3a7f508eed7ec6bf0a8ab322f712efc3138388320e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "38c9129f6bdade0d63136552570289f219ac303aeed200ba3f99894d046b86c6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "2f7a8a07d1493146dba47a00fa7534c3fe951b70f7e20621f118998f9a652f08"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "b8f23f9f99e5c6e429312fb5cf9a69a9527123163602d3cf216f9aaab560352f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "fa67f30046466722fb25aa1be6a9af7f916609e40da196f2bc682684cd8931fc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "40d80f05463d2023e9e822986dc9d5ba91a1e458372219b29bb23d8a092e9fb6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "a8036dcfc9810a455cfb40ec361432088ac1593f16485050a40a04568a2bd571"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "36d6f8e0828082ea148ce2bfa1316ffad458634384fe45b98f8906a4ea87c167"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, "ca713629e6f9151597985e18ccc41cc8be83046ff4658b545387da884e4483f2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, "c3eac3c8c1b9c139d827ab0025f85f0a2a05cd917ac5844ff70fc69e4ebfdfe1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "58e8b526b56a7f1c443eb2359b94b24e84ec85681bd0f493223483a232d76390"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "fe5edacdfcc3fef47ec398b26cc1a3360b31d6c58e2b0cc61fb51db56ba9c482"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "8566ec2c6134391ca60284410d94f9fed33f256a802d455c041550cc763b5344"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "d0157bb88e287c43a44cbbf4a49bfedc1587559ff429b5c83091fcf9a37584ee"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "56bc6d63268c6c570a9ecce7f2547a956083c2fe3aa36566e92e70c48947cc63"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "5d18de487ff1491f2b3dce71155206f017467a01c9ba9c96f4e676d8f7de80aa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "d7d9583d847a07ed961d083d6d9ead6e5b052888f627bd080ce5529b063f28d9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "061fac2a67600e3ca991f6d628a6b671e5cd1e838601270761f0316d642c680b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "cb03d4032f814019afd643149ee38cdc19c2b9881a11c32d93c09e6f7e74d6a1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "bd12cde45b6be0056ea3b5809c343e483f0c81110076bbe24548ac0d61123618"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "3889be0c9c3259f9d915349966181b4dd60337e8c4906bfa2c5258c807a2ad64"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "4dbd31ea315cb0f745a37398312d9e79509ec024dbcd8f66ed4ce3e8a7dd0502"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "a3e57d0e001baaa4bf8220e95c567979625f6196a4e85477fdea0eb08e4736e9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "3defadad5416cc981a069c5dc28c20c89459e5dd558c9f1738c90bcd155e6030"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 194640, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "2f15a652a03da6e6fa202ffa9a687af4b6e7881ed793e1ce07fdeca60536fcec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 159776, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "c1fda76f5bc542267e3cd24f8b07904e0cd4bfa3e8ee46dd2cee84a867836d55"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 168048, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "e990cd3d8e75265f3971ff177ad330ec3c6cdb1fcf30acb8eff1455363ea4018"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 159760, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "e195d0b6306b799290d3da70a118511f1491d0baa8d8427a23aebd38826e3322"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 194640, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "e35c5b452455d68934d9f5d7367e1fc445ed3594dd36a9693389a5be53eeb3d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 159776, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "a8ac0d9e4d9484e6900302612b7ec8eaea8b65aae1551fa000709d4c288100d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 168048, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "4ededf315c4554c202b98e2272a566ed3d42670d5a5340a23b5228c926ea60ac"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 159760, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "0bfb7a1f27b9caebb421c0b17a425b98ec0de005f7a4ed0a3d59f2f277e73674"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "8093180e05c28db379bc370530b8395e1bafbfa578d48017ee0ca4eaccc32185"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "452f83567c38da5c53a512e709fcb382e6804eaa944ddc71a38b2fb60f5d8685"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "88260ba91b32b202ea184c47ef9b7afb53e3f5e30aa6288e67f49f60015b5258"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "174a3fa642484ca828adea81f034a2d0f85dc8a571122b9e930ded4265e9e71e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "694571d52789b703081f49d4ee1b76af90f636f5dbfcc3ef17ded5a955d3691b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "ab5c422eaa55e1bc54eeda972cef9d6fdc5b888be757c6964109dbf1894f8d31"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "784c57846037b69546648c40e60ec23dd0bcfe5ae9019109e21c6a6e608341f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "ee49268c253176a3625ea9cc9ebf6f0b9ab33722a69f62567dd2b3cf43439cd1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "f7f195a081e6f35457b3b44dcdbf40501dd0c099320416acf517d5d227bbab6e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "44ed139c56a4d2ac2dc9ac539ea375c6d1d094ac8e4d1a3b69c24dfaf96d19ad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "91c628cde9fee70bae33c1f16e26a11ab1db8af595f60e09450d512edd44dbbe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "4da5dc08b67cee182c584ac3be2c77f5e9503fd5b8142d25c33a1020069099a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "c2855a58ba95504238332e640ca1ba52a2a81596059dd68a0ba7f4f7b4470b27"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "e9fc4c564c2c9abbe3a3a7032b7946e207f9a798f88aceb9515ff293de5b0df7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "6927f26ac73e1cce4ea1877f8e48c13344955646eca0714b93e692a4e40e39d9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "16595f13d4ae787594111fe49616f1cbcc98ac071e2810bc82ac64db5fd76cda"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "2f4e22863a0ffee5439c0d7e258ac21e5fc095aefab1126ea1cc1e4e9e7b0054"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "54551520b572356839afee7e40a767fe08713d30d7ff42a077e422d847eacc8e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "da7745b8add2e40770f9d001447d3b93281c6f7bd0b0bc6268f1d8015bd486d7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "04daa14a23022de28dbaf965f0caa90de69d4dedb48598bc721cd5617f9d20a0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "e606ae53a6b5bca9be11f3633401f7574d35574d04c63232af291373410231a8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "7009a59b528e30a0d9a45a62d3c1a3275e667a2e8dc4f53d80aa1695b05c612b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "f7150d69f288c9c395d785e22be9ce42ba761ecdf28d3bdd5d47ba382aa55980"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "43447276e111d4f7e3b97dbf380ac7e920a8bc37392a3c695cdfc737e7417cc2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "770d052fc8b69cb40a239ecde30c01334abe6bd5f8b729c1aebe6472a2d4334f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "36edca52c4b7d76b6281c34eeff25f65b4fbb90bec7cd125f750e04127499f75"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "b79f16bb3b82b4e56c66e8d81d83487236c3bd1833dc1a02868e624aaf7f9006"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "d91697a7df4f6b2855368298cd849a678559af91f15335da70ff3ef9dfba601f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "cfb771d496b0b74de86ef2b19c31e329dd903944b1773623186d1f7b3539b080"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "05cca2ab6712fb393b7e1effff01941553046ed4d020d727f27aa67ac86d8057"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "f74a619e76c338a2792a82cd89dfdac61d50daa3965364bcb499f222e1a6ba75"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "b2d87f15b3191ed1642591891590e0da0a4496e59bd4494f6fdadba2ffab068b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "952fe2a48f2a8d0640d71c131bb3aed36afe0c8f84f4633106d51261a46bef95"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "327ec9a2b45a011561b3526237de2acf58f0dde78711836f36257b54dda8fca9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "4d0420585be7bd5a0b579cad95522ca43aff9a53352f510e741f6b76e03ae747"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "2651fb5bbe50c7a3e2003672861e0cc2dc6fd8a2a7c2d1aad7df21dacb1d88da"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "df0ba885a53d4973554b3e01d511a3e18d7855d2b0b02b4c0e871f39c086f6e8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "7a7fc6ae4e3d31de391a59f14d1d8114cce018f316a81777664818a0cfafd884"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "d98c3d5ffba1f8480b5d3b7a3775f0da076adb6347765236f40126f09f74bb84"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "31dbc20226ab0253d5dae991581b180646ec9788956452065d9803c787972ccd"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 167184, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "59437f0a61cb419312c03f5bf3a70618a8a458e4709472feaebe822092414e85"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 167088, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "52c39779f2983d94345db4308e0964ac4456df3604b9a8f3c777d32b221ed933"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 167184, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "dca8d970d16a886f2298e2ed95eb987b88490ef87230c9da7fa5e002cb5151a3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 167088, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "c8e1fafd456ad928662e4443e463692061ed311f90530af874588bb48c0af75a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 167184, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "5c64cb2542106f8d368194187218ba59744d025edd7b9e7d23615237d08eb589"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 167088, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "2494280134ab3f319c3357ac6e6b0b5c0d3beb34b94dfb82922c563edfe43476"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "f1c24a7f35f0c95ba0a96fa76ac3c91e83cd1b036fad160d4324717c67d39cc0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 167984, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "e97f67c03325097ca33dd45eee27c836050aca9688ce443b54c0504eb9973a23"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "6709df3a9374bfa49af2c3f7911e2d3c388c8cbff774e41fcecdad006bf950ab"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 167984, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "be831e9606f67fccb37710d17c1c7b5873c127d28f08cb0cc3a53d6129f3c64b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "8ea5a2d34cf67b8565a21b6bd10942dd5e33c1517e8313d7ce7f8977d4c1bad5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "16608ec8f2e0572ff46ac42321176771b7f74f22a7fb358580e5885687db5d7b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 200816, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "5fa30d6920e74b1f697c052461652db2c96ddd476ec7d4e1f29d3899f89b422f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "dbf8f122620bc57df6ea13f0a3f3abd419f9e971d916bb2a99e7954980122a2e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "8f7210ae6f53fcf64d78a3899fdb20728a77d1f5b865d40caea12d212c772377"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "5756317313aa3d826f911803eb9be91df99b0e873c4e083e9e266ce49dcab856"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 200816, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "7754adc4b01f16eb67f341453a284e98c089661582230348fbb9d0019c5cd187"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "abe2dd141723ee3f78e59c0b525bf396759a4ca21884bb91aa5265a830785941"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "c662f43a71d8f323c44b2164453bbc336342697136034d48f0d75322e15c7d64"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "da72a4a0470b64e4371986774fc6b80f4ef037fa852c8c96821969bfc2d03ccb"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "ca63a94161b06ef284a87ccffa5411f0589d3abcfcbe99e2fcd4a6615530c69d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "ffae7219bba00e14e1cc9aacf64abea9d1acb1aaab5060a373a97a0295c2e592"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "d499b61c849ffbebe9cfc2455612ef356afd9b262cba9e24c0d525f134e6a704"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 167984, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "38cf8218e1e42e2d9ffb8156299aa3120eb29a693a3e09ec859ed6947cd4f47e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "f39d20cd9a70ef38d0aba2fcb61012c2989104e19c9e969c3641c91fd00c7ffa"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "0e1d4a4e1029f0ae7e056d9c0ea60bc75fb4317ee2a2ea7daf3aad9021683cb2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "432c36e936c542aaafea9eda8dea88c3c110e927479545c277820b21cfd1ce3e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "285e288e1abb512b3c2961185eaada07c9df74c6a314188f2334bee4c7d37c18"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "60856f9fca9500cb459b5157d3e38827d1b8ff32ebb3cf7fc1c31484fcbc88d3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "589d7c89b703a5999f88ff17b0dd1b511eff359bc6fcbffb37de23cfc4bb3e8a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "b90a494e9cb2a92edf60b3ef69d1aab85994f2d95a43fe854587d4bdd9dc0ab2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "631917c7860674d0b4f553c5d376fd8d53e914a360a31d2331ebd0c0b9caee3d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "3d998712e4ffdb81144e043ffce8f9748e6ed6af84c9429decb69ec28ccac07d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 167984, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "a7e98dfa1605f68c9f8e1b26498f395c7463c7cff745bf20093e2147225e5365"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "bfc13cfcd8ec3ff551cc769e173b353102b995401a92fe0e34c292460a1bb659"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "13641f8dd3ddf1e1d2f714e06b4dfdda08c4323e2564f82db67a9683e5790403"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "c755fbdf1a084a6c3900278ede2143bfaeb4e398f0142a347f189d364d00e38f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "f51441e521ffbbf69ceb99b00112eeed917cea37bac55bdff9a19e3ab965f631"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "f5114d5257023748c43365028827f79e9b894482e3b8c772494660a11ba1f4d1"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "48c77653617e1c94920b432d8847b6798c82d8be7241f393da90791947b1f909"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "37f1c52560ef412e207d490e99c2c9d7c422bd194a1545621450c7382a8a61c3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "3c583b4c50c6e6401857382e5d0e3f55571b22e5a7d6403f61f9448fc326409f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "248378c76843562a37383563151873fcd2cb75de589435d4bfd0270a0922bcb7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 167984, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "4fdb8d495e57b49f7147911d2d32c6a3f8cbf544751d4cd1d6eeb8696c4a0e99"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "de506b308bac4f163f6ac8ae00a5e59894a0b663359587d26b732b3991c46aa2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "681767519f2c57ed5b7c340c08e546a10bed1ee79350528903e0fca76bee6e16"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "edb0423a95bfae7c06a2a962bfeaa2ad32e290d228b4da4259f02be2ea927245"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "7df2fcec243e8ecd845d914deb9d02e18816e25e98d35578878dc797658f53cb"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "60766554490c539367cd3b6293fdb74910d1547742781fdd3391553389fad635"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "c18cbfa1f45531be2fbeaade8c44b2fa136437b295094349927a0a665baee0af"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "48d96428d1023e07b21168a4905bcc94ab2a805a873840d84b0ac8ba0521eacb"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "6c2bb8f79ad31941fe48e2d8ba877009908a8466e0826b9c76f00e3e00cc3030"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "9acda43cbd6cfd4abbecf8d3f56b16276aaaa022f5a523589a136cd872ea40f6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 167984, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "7a47938c99a7d580b78730cc2881ef75ef744e62e00f99f8f650d5ad23a116ad"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "3dd1495bbf55e03c5fa2e80e908792c238b1c42489719fee811606784edcd1d8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "39b0387e1107e662d7cac9a0748797b3784c660a4fa63de02b6120ddf29eb22e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "d888ee30eb602f0a90ab3cfd48cb2d56db27b142d23f8e56ff687b83caeb3358"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "8ce797746187f70e8f66a9350781b1bfeca229308a6cfaa855d8938f2a04eed8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 199904, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, "20b68ebb7a8f3f792163366b3cc36d484e481f78f12fa59bdd558f9b261e9165"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 199808, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, "4a2cb5d8e144c6a5d940d42c1a7f94369a7f1d2e998f35b525af26460f6e6dff"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 199904, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, "fec4f7752071440bf5b20300ace11abdba73c9738f270b3e00abe38eb758aaf4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 199808, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, "78cedb18cd173140fc853bb69bd311cb99ab778cf38e098a45459eecc2996a41"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 199904, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, "db48c4be181a2cd07d56b266db5c15816c5a13969ecf2f3674f074430f372e11"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 199808, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, "594ef21baec4893fabf6ce8342619af12b0a1b26001d75572072157b595795a1"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, "7cd85af89575e4772b5913a9a3be76f2d80856680f8fada9c6c516a6bced5a38"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 200704, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, "e712b526098586dcd629400345aec0e98330042e183758c29348328189502c90"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, "df8c8b4348e953c08241648e74c5cc95493c9e9864255a7d5e56cda49ec0b1bc"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 200704, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, "915c9b5fb2fb08db8542889aee7cd83e74c3a980fd047799f1e1a04eea6493d8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "23d6b1d6492fc45542e354761ff3e9ba843d6945d99b2655323863f35cdf1724"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "e3447184746088f9a017bb9f42cb71f73643f8db36bda56e2b5e9c1e21ddabd3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "a0ffd72b4b48a10987a83cce4a522d33bce5f84b99526b0268b5a7badff385dc"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "5f896406a9af5a28b24d3e38169dcc3ec0d282193c356905b76f832b5341afc6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, "19f5d2fe48558f6eff0f60066ad621333f8dcdaee847a198eca5a193070eb125"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 200704, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, "2fcd3bc133685288ad0f777de634c4d79835dc39c21802cbc711b29bd8c6c9e9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "f186f51efba1c3b091ddc6d646ded278220c27b7af343d9d1418a1d2b2116c2d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "c48653649e83756f15aad32f890b6b0ddcde34106387ee1165b69ae84cf71c94"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "537b0ae140e06da5a5001ca775f3646d971247a44abe5da18b4ace0ca8266d02"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "f935b70dfed6def101baeae6e596946615a8cb1d8ac61a3e6921633d0fe9408f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "5d54f69b89b1283e34bfcc6b9e32e9a7acada65e193e7d81b871199feed46e2b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "673a282abbbe6dcf2ddcf6a41ddfa03a75e476a85edc8f017157ec85cb828b23"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "8d2d57b9c769708fc682ee4e5fb50e7f4e40f3240b77cfe15afe8017b53261db"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "d4e4971f653dd60458fe84b492e24d5456098a8530f847220065da2d87df5af8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, "448b8f6c03edf356a2723d9818ebb870fb711c25c256809a9ba92ddb2af2a376"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 200704, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, "140466158bbb922be934b99d4cf804b46ba652984167e2dce3a44dbdfff035f4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "2da6476b0ab39c1864f5059351da3e01d172cd4723fa8613f11de0feb0672dd9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "34a11635cf6def823987eb5e338e98ef5e1942f1452c131419174975555854e6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "0e90f6751806ef2653e225cd02f7175abe3cb40c1037eba99b091bd42b3020ff"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "ae58bec1beeab7e6366a2223f35d977efab596d2d6f5476acfbb0ef16f5be9b0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "acc3fdefa1f361ec092106f532bc522c9c8f5af2457d9dd7a0075bde2c92b4ee"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "6736a63e7cad3d65d48a3bce5e95bf5c877ce9d3a822d38c5150bd5cf2f3b8ba"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "6099f2b0c5dbb1d2215e0df0676b327856953afbee8537dd04190fca50dcd7bc"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "c508d0c13a28c214841aacf270a9e262d2a144ad955d374c1f966bd4cd99340a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, "cb94df473a2bdc69ac89cb1bd7ff8315e013ab943706fb15b882c6fae8b1ef13"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 200704, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, "4f7263bfea4eb8315aba663ee12ade27adcbc98c960b5c1ede14f0d6350f118a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "47d73ef268e4311655ba7fe2686bc876487a0480cd2f1495ca6874f003ccc98e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "94985e723d69ffeb6550714220b3a9d9750872f5a46f4c2d9c469b01d1caf8a3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "c8937061c93f6c54f8c1a92f20126348bf847549ffa85a81bd898db0e135886a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "ab42bb7efe6638efeee77d337066eb615a0e12f84c77b3e3c648df7d1a63db7a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "78c9ec224c43be4ebcf482e42691e58f102ca6b394e42ee1eeb3b0ea367add7a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "c7c72b4f5032c545d8570734e0b964bf080b824ab86b89760ad4ad0b4ad74370"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "b6b5dd82901b9012264b5ac5169ec0f5298e02abbe35b316048ac2570cc9441b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "2ff7ca8be88c587aba6239cbba4092c7fdcd5557ad9a8bddc11153d98632f233"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, "b6bb3f074653716988ef0e5dda4f7925e1756e63e98350a58b61c5299989e56f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 200704, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, "93a17ede83cbe455ebde61ec500876d57718a1de3830a0e6918b7281e77bb7a3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "2bc186c9783ca37df51dc7910b06c155f55025728fd813711f23a5bc5dd01381"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "ec4866612cc726c614dc7626f7b418d0fa79bd06276e5e318c915d944fdba9aa"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "0443c99ebd0963e37f38d815708d432b22063ab0c2d9ff5d6f9f280f762e849d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "c69f85348ec05930d48daf187ccdc7a62ff99eb088bc40de95134cfc8f11e959"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "4ca86057ebd44e2190974ef8df30de3d547cfc041c2e63c40e404c7d976a2ee8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "8dd09fa251bd9ef7c67dbf39c2fe2ace14b9de42b626c4ecf59faf7b77f498eb"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "69d5cd74351da498d825c3db9f6ea5ece7c7c95ed8c0cb00f3ddb66617b551c5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "c9c71ce36ce4d93497f551d2ec3d00e8fa4ac910aa9b6aa1725e22311be67d07"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "498b2f444d54546832e7fa32400d14ab3bffcf34e1d6f9e26286f6295a51d2a2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "7fae92a0bd6fe79cb9188e4e5a09b5e1cd2b90288cbf855ea49798e5a3aaf00c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "f2392dab7ac1e5495127e5dbc7ccba62c34efa26d7c46499400aee8fffb38bfa"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "c3fb1d92ac816eeadf7d145201a3833788957f0c6b30333d81902caf399f7295"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "f6c6eb7771366f79711a1d2de98e55908cbc09ddb54c5e231a0d948bc7158fe0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "d866984bbfa60ab1678775d9e815dd15a593b6834b293e8169ee6fa2be8d2379"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 202832, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "1740bc674db4f675a1570fd7e192ab5e8f901c93774d569360da9cdc58cadafb"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "dd6edc1f7c979798b07394b6ce8f1d770afbca8880dfc8f620e3af27a8bf55c6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "d9ccb4b57c41f6cf19939d0678a11c7108562ffb76f950a20389a34ebecd2893"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "85a64cfd5d22fbed5efccc6164d43af9c44dbf60e13f98031ddcc8f56cca21b3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 202832, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "426568d9d693de4407eba5de101074e4a5d539ccedfe5bc506d17d3c44d85702"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "628b85372173d90398a9f226f5f6703790d3eec27f399a1ce5a2ff7beb34267d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "8bd8620e5edc23324ce8792cdfcbce749a496d83cbc2f984153f847662dc4a7f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "f426b03158907034077170155a56f6a4ea38acf11b3b21666285fc89d0a4fc89"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "3e6a92e82b7af818c72980b4c8c8d11a4a350ce3095c389c1a6ed47cff16b692"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "58bd88b77bea426029244c599cf01d949dead68b10fcce303f69ef3e674f3caf"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "9a71f0ed31964ddf039486da29ab2466532a346b4a5239adee016c67bd0c6d15"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "99003cb8ebd7e66871db8cfe405b7de3a43da5ed2a5096a1b2b1e02746c90242"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "4e52e72e0269c43e478d3f57d4ef907747a796c4e97afc2e1f95f46b53e6c445"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "9a6f430fa49060d4d310fe35bb0b2021764588d488a3a1e3748e11692111cb66"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "3922ce0241a72cda3ce3d4160d6b6db571de24db884f345cf33aafb6762e39a0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "1a67578c94eacbd5b96df231043c3307dbbd575b227507552abb08b7be742d48"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "9cb780af05563f23539ec78ef592130108162d7034f01d471cfbf61e6b70c6ee"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "d24a92b4337f8711f6ff70624f63dc9ae57eced7172ae1aca3fc36c6f91e8cdc"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "8619579a3bc72f51997a0bf8872ae5a408729b557fcb99cf9c75613c2761de5e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "7ac85b15ab8f91fe57c3f98640c2bdc28e7fb8a259d5ae39c96914de19ab37a7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "973b74a250fe5d6b1b8a2b4e44557a6e3f691ac070337e7a60de8a1c6d5a583f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "64efe35964f13ba831d851bfd93ec7311112e7e6feaf852a0c27ae57a2a72c22"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "65dfb6ca51502ea42ca9879c10a5505f92ac70a1d62fa6d2c99f081c5a3dd2e8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "5a22b81a92b709cf9a35f995a7c2bb9c3c4bae6724f4d7bbf7cf73bee740b614"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "6ff84a7ff3446df3241cbcdf59ffaa4eb25b30c428fde1297b6b8de1c96018e2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "425f0eec119d32b45e4fe79dce6a0d225227a05f983a176c54f243ea36d86c21"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "6a9a5b692c2d673c6b420e2649b79a1b2434967c3a80a2c821ab323f1b73852a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "7e8d2c8bc8075b14ce328d48594b192c43a6f1a6f1a9001d564b8b18613461f6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "5f6b234bf1583c117a7df37c00e4f6572b2a5476c27c3a948f2bded1971a6be2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "26fc89c2d7f0068a03a155bc4ef867666e8b00b7ca7a2634e43a447a2fd64d4f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "510c7b766a40afcfb4530376c9d087730bdb1abfa279d9eb40d6d86a1c91b2a3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "98747ab0055f761b8a5ac29d3b219bb01afaec894d6adbb8bcc9dba1af8cc700"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "e4ca3cc69142747cc01a9ac8fc4611764e478cc7dbe06390224c53d3e4eca566"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "53590a9c0da67062d12b4e9b6ae8775fa8b4d78ed7ea744061acefc31588ab58"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "dfd97d28fc76b93f426145b1572a9bf15d8621f46f85347fd08bcf83af1d7c9f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "3ff8f66250f47b096cb5da63f632a606739d08a63016256c17a0f80b189ed714"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "2f221f4b685f8baa003cc4d5bce1c15699ea277c571c91638a7d740e46ee04fe"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "c5ec20aa0372e2d2d1dd51ed8388d1fa97d194eea7b7d126e8fc03a0b96c25ef"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "8d124afae32268a9babdebc0d3e4fa8a4989f34b8b350c5c39eba07af54c20e2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "ea5c919d513198e975bb111d9bebb43be0b9841461498ebc91dd89856fcf57c8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "553b1e980d57fa05e43f4b6a778831dd2310d1e23c5c84d09f127f8a46cc0b10"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "86c00185fc5a494de53c6f57119b2b6f80bdffd24daeae6930cbf2113c0f564a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "c660b241bd106d0138136be51ac92108ec95a5a4f4dfac6be0bdc04ef01d8920"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "d5eb7605397d95f1e663fad25e7bc1f310e10be1ebd8ec8a45efb6e071892226"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "e7e7642caac387bf234eb987fae7769c4f77a9d603bbf339924221dbe38cfa5f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "dd729e7ca9cc5c6abb20f1517e6397dae364f36178f921214f6af8c9e2cf8130"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "bf6a5cf75a0d9bfe8857834418cc10db5c454f57561cf157662b7e400e958406"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100Kernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "087ca741d2a820686ce1ab99ebffd627b190c74084397227d59290431e691d89"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 131184, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "11f639fbfb360c441c905ebe6a558cd999bb7f9f82c5a603716088486faa520c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 131088, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "6d5e183c93428798968ea0d5ae34562a73ff294b93b3e178212749c00645569e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 131184, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "b21bbb48c335e58050c832c9af2904016288834e924e137db7d203d39296fe86"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 131088, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "f1702f49a29eb7150390762443083065c5bf49247e1ca4d8c69c6b215cb58893"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 201936, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "d69e962a279b75452c7c455a29765bf410a8637b98adb48ba51572c17ac38c37"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 198352, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "cfc7f8eda999f1680b36f3e2484d57e532be4c73c6f5252ed1ac1fe958c4d10b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "b6de5e67f4521ce468d9a1d4891445d6f61a0edd0632a206a96b089eea83b300"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "c865746ff3ad97b39aa1351a100ae11a4854bdce6fa12b7be20f0678545fc3ef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 131184, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "09c3c27d43914785ad4de3889f80ff4d74e73a0f3c9e262d784edcbb5d4286ab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 131088, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "7d6daa70de4dfb4f633dd87a46f2dc6c87e16bbb73e9815afd78d18c48907ca3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 172272, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "cd94e0cea220d5cb800933ecfd3fc55fb33f492c9809c65d01e64c648a8fe164"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "7db4ed6abf8d99c72c2279b36d3205a174e2eea1355b91b43b72fed23682568e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "be80c25dd1daf54e676830c7828c5d81d678bc6388ee2e7d163402ee99d27842"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "a22555a07f292f51dec607d2fec8283520a1c6c96a6616813ef85ec103f2bff3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 201936, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "17e3ef2282d710fd0b24353a6a9013d171b6a6f53dd711efc1c6bda868e7f1be"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 198352, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "832957230ce6fab1e373c58b60c58639b08b45a3d42a1ebce74c8f79ff5c01c7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "62ba458163f2e6c65ca7588208bff371ba1a29a552ccfb275bbd54d8080fc3c2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "d15b03fe884e4d62e13b6303ebce1666e06f8988f453c1ccdc2a79fad7cee285"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 131184, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "b01b02f3cd916edd9ea746ed10bb738f65701efefca16483f9419a4b24f0a9ea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 131088, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "a61cbf3d8332ea67fdb33d5d3b0f09eb865df834e6b6de25177a6e0131026b37"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 172272, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "ff91bef19d317261aa5bc3763be2ae839788f281fc8b358ced4ead4f1419016b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "8c8141ae0c55445894f40aa60d71643e677dfba1bf2645523430d3a6207c8e20"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "1bd41ef888f33b1ff679766089735be8c3f3b620a23a76033af91e023aa2357c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "72238320d9bab55a838d26527108a36da63e07b7aa31d7098a3cf18f57ad5a9a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 201936, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "310c57267b95f0f36a6b83d185097dcc779bc2e02fc81ae5355bbb7b5ea5610f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 198352, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "5f184e1ca8da4266b2bbf974f741b5581e98f60fcf32424a27f637444d5eb644"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "0ef9f085ae00d49fb810d798e13bed7eba3a32fc810f4ea82ed24c3439c59c6d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "9e315163926abdb3a26428e7a4d7cb6e589d01f9b73162fb93dcf4eda171c9bf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 131184, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "ce1fdea189985355a8473da9bff600ae93d24b4f114c0f3bb78f142f5550b24e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 131088, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "98108f15baa6341bc18aedc6a1793efd3028714cc334589d1ffa3fc8aafc3d84"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 172272, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "9048c98f7ffcbbc470188cd07da8d057d0c2bed50c63670a8dcb625fcd66bc33"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "d0b6fdaa093cd25aff6c56e712e0af059be942d710c5cb7c3885502c1d9b3302"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "bb7c6b7b27c899fa42d796dfdba4b45376f8fec9358721e23790a7c3c90b382f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "1f5498c17a9c739ca78685f82cdf12221002fd4df15a904699d68add7795c33d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 201936, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "34de19eec7c895a03a811fda5fba753ad038d83559198f892ceb770acbea056b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 198352, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "2473174e0a48fe8cbe7916b0c96a5c17fb60b7802dcb1b3ba0b33e8be5e9b01f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "779be73eb0766640a6e2f78af2a6a81752463fc68f8fbb23b4d26d7f976f3e15"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "31cfabfcc3e9119e2b69c4fa5756e7e7b57c869123d1ee494d4a702869ce8429"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 131184, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "eccfc3926e89704ee6463734017525e568e09c487899ad1f81aef560d18ada7f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 131088, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "f35ac4e3f12ce718dd85bfdc9de5d17e612b56e0bf2699ab1f251b8ea6f1d510"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 172272, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "520611867f16670dd9886da8fedb91d677e809d40c0e1432dcda664ae9bd0a0b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "00345f32dd22daec7af8ddee8aa52b4847aa14ed08d3128d7357614223a5c742"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "3dff99c7d171e066c0701b237a2abb01725ac7f9e2ea7c07be08e304e9bff34e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "f90633596535d71de0e2c06b2f5ac80074b2026bba13cfb4c00bec87eafd06f4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 67696, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "294ff8f1d56fec12ee798b8b3f792e7a321f6820057d6bcbf9c99bbd792a3b1d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 67600, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "a9cbe53935d51c8fd109af2791b3cf5ce8894f2854b2bc5656722bd560175102"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 67696, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "a922399fbf52741c19837363949a372c8b2dfc5c092b5ae8aac3e09102c53be3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 67600, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "42763c549f118a289204607cb265b6dc2c8ac5090a76155a9369d1d099aaca37"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 162000, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "a96b745fa2d63a893cdf8be1d0fbb480f6b55733177789d99f31b4c5f410e174"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 158416, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "7cd766c458b069758723203fc17ac9386d4c11af185a2ca2d0fdce8a826189af"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "be056e54ddf91840683a10bf31c3d23ec84dd0569f953113e371c9a2e5c9cf31"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "ceee41ac8f1dd75958788f57be380a5b198d20a286ac577a5e7fa7ed30439a8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 67696, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "8996eeb44bce5a5e97d46b09306448ea0f31c771179aca12c568d2a99ddff8b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 67600, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "8680c558853f33388b51c099b473a1be864b65ecaae9828fe92e8f1b1a9c843a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 129264, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "6b7fefe325ea5ba806a539e77a340c47831efaf7459216a7a3bcba1e82876061"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "227eff4d24948b4b96edce46119662e136f606946aec42980319401163710805"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 125168, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "9ec2fe5b2c6257618eca812fa4f4b9e40e22f9a50192695b6cfeea37376e4ac3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "df2a361f18f6a9e5fc8bd46f5548c4ba5ec188b0f2d27b38a8b739f8351d35be"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 162000, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "078514f72323e91cb5e0746a9adfd9bc64c4a0b689ed9c9ec9c1497d43e78cab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 158416, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "70d55cbfeb9ddcd98fb09ea8563d1366178da5f743061509285ec9a88ac0d1fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "00e944e1b003350867e95c5083c4bbd6fa27b1ce985d53a9a40223feb6351ba7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "65005f34da2218f074b582382d11f48cfbb7bf325e8edaa674400c8e5bc710d2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 67696, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "4a23f455d5cd9ea3e647b4951b86539725c5f91f8f4272c3f58cf6cb707cc054"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 67600, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "fdf3c88da02e48c8d087cb6f7543593258377448be9d2889809035a5759ccad7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 129264, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "537f62f9cfa2ab26ff4a4365da08acb09e6fd2ddec604d9c41673a5944bd03d0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "182a4c175ae2bac4157df5a0455f4157bfdf9790912a3000f82d689888424db2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 125168, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "4daa96a965380c7b9b0bd6ea9e00654454c72a85e52d4ae4521780b81884b841"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "5ac34dc6e2bb889c56bd032429e7e4a1f570d467f90e07d7b1f97870e1dda7fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 162000, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "c75aa14de2b6e0e1b90b75069b7180b94d2e835aaa61c7d5c2b9a4b6195d0275"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 158416, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "3ce02afadd4d254baf668d8fc74d70b0b53f391b802551642bbe9e4e846a78dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "4771c43ceddd716fe20f201f125d423a43105f3b7e06729a331c24a91dc98c51"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "9e2f9de4db752d7bd1e82735ccb3ddf5d8f3c2d43b9a2902ff23eaa3a0b8e490"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 67696, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "70a2ebd2d060d0b41f548e9edae4e2678429536b41b2a569f05c5130fde9c308"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 67600, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "9822eee045be1bc1cb00b805fee5f826aff0c7145cc9813a1d60215542865ccb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 129264, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "12ac8c196877b3d868811ae816e37bc77e793c481eda711f6a9dbe0ceffc7c25"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "22b8cc062d7c4d1122aeb3d9306c07cd988803963c84faf6d6b18bb557ba2008"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 125168, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "dc3df05cc1b1628ff3e9d2a41e7d031fbb5392993d247697c88d6f0c88bd748e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "e53e14d6396a37d39f4fde697710973eed6545bd58ee812d984d9f46caa2c7cb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 162000, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "2b3258c83251d55ac1ae20346dff454687d067d23ecde2a6f7f3b9721f0a9797"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 158416, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "2525b98a72b277881a1d07ae9cc8a4bc2748d6d3da4620750994295c5e0317ab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "41887a3fea209d20352868d70f9523528838636e0d24ba113571023cdbc6870f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "14176d8aab3db619887a86b31674b48eade772a315f04554d004818326262938"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 67696, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "71970fb427748cbd3b2b31f40b508b265f3bb469b86f0a5e9293a2861a3df632"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 67600, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "46c53a1811c489f2acdda5b25d52537c3e0f05b925d1e03266cfa37f0775333b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 129264, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "583ffec1b8cdc3d753cecd9a2ada534dc04ca9e286c1e812bd8065a922db9aa9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "af2eb595abe872ad15d8d9136106e64482813ac590610843ca154ff67f9ef78c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 125168, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "baabf3fa979f6eadc0b8ac6c33ee0cf27b6e649c662ec5a1b0d48769d5bb95b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "799202485e80b6ef1076448acf59b3ec6409f8a8f3178dcf9216377bf00fa6e7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 167184, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "3c5d4fbcb01aaed2432379cfce117410fd0139b03fe80c3fcf21b9a3138f7886"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 167088, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "c5b0321ecee1760f430cb33b5ad5427f0e32935c37e6c91ae955d07b69fba9ab"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 167184, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "1374631f3fbe288cb927440ab090b931538774485f26810ab2af6090d973530e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 167088, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "2ca326575a437bcaa240b75c390a7f5f906fd33a151ab46085d968c4506572a8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 167184, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "7812d19eaf214ade679a3d40e96e2c86db6700af792331d002cdb136c0b0b485"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 167088, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "68dffa8e33587bb80695bae51e20949abc1fe5b0fb78e909b2fb5849ae2d1d5c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "f28ca24d56fecb1b3e8eed1404825e0f9fd4e20a33661d583b27eae182dcf2c1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 167984, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "d3f92651ae70d6af8c95c25311c455ce371eddee2ed682ac99dad8425b991a2a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "5f885e0c85b301e48b2adabf919feed8d5b55f220442be7a57e982e3caea192d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 167984, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "a7efd172047303402202702014b89fccc89541741f4464080a70e68ff2dc74fb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "7ccaa9787b885f67c67b51cbb1865f7901c94137dcc69f27c9be17dfbc5b1e95"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "4b8b2a656e12736caea48481bdd46c2e99ef725c5fa5e1642e97329c5fd8900b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 200816, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "a16bb734a41532c3679abd2a62c515b84946ce7bbc066f76f15c66278d1fed5a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "a4a318f0dcdf4a86cb2835b6c77db0afcc3cbf8448ae728e2f2274e588dbed29"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "9b82c58ea34fc40c82937986cca9b0021dd4035f9caea139d7990284e3c5dba2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "4f645a9604bbce13effbdcfb62ae210eec7c36d893e7aeef224b9a7336d15c61"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 200816, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "7d669d63d4b781d5b6fca82fb0001006abde000ce52617af89f524f7988384e3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "c8d4221a6387dcee6c01965383278b305cf80a54ef5826294a2dafaa1acbf322"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "221714b561a6f8929a3e8ce9de579030eefb00b8df7adc1fd4c78a485ab0aaea"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "026b5aced9d13386ca11e804022a9d4f050725f1b5b4095d2cf869e1f7bde00e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "210c6160fcb692270c61ea8d2fc555ce78afb8470e4f3e105015f384cff93012"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "b59349fd1ae4e0f45dcc5f73be51f17448be5e687692d73de323d8d96d984fba"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "5f1fc56459ab8ada38bf800579f2c034af979532357ac0ab5675fafa8bd0252b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 167984, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "ed4f5f79ea88cca0a0dc8c25868895719787f5cb247b7612150012d7f514df61"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "86d25b7efd40bbec930313ec802973a3d61ef0e6e0b6e312367cfd6d86cd27b9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "71fce67208aa1461b8dc82ea9d9c5328195c88bd37bafa63ce6078d4b66bbed3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "3ec63f3d2b2ea47c718405fc9bab22b8b94335f2190e501edc2456b357efdb80"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "f88246800334f1b8857e90bc0698027b2fae54f3502d96ccb817a11f931d2c89"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "ccb954c4c8524abe18193d04d1ed0d49c6d9b10d5162f89a9307a4628ce5b461"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "9955247ec104c09043031cfa4e88a051145a0a10eaa4812ebad2fbf67965bd5c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "4fff5886a92bb54495db7beb213aa276d719f805c429582143d89c2745354bcc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "3a2bcfadc1f6054b2eb94e92a6d185a8c4f5835bd4c220b3f5e95f232c1952cc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "506924e3e110b85a4662d685eb48c60e202f7ad17d1be6def55e4e9d381fb866"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 167984, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "2c4fddafa022d8324d6ce4472407da9bfac93327347a3541e642e19c3b5ca0ff"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "84d2e153f354ad8a7668cd219f6775611b1ecab0261bc2627520848db1d3cbaa"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "4380d88d18c5d4cbff2d5b36341b02340f0ceee2fb1d863e3ee8f1ecc054959c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "4372434fdda8620f851f0cdb6bce90fab6ee470750d045432cc12fcb40d90b1c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "af73599e083de0904f808563a77927c101125d8aebec52936ce603bcd4e1b3b0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "50102df96a4cb32a300f5798d20002df77e1158293473f24128a71c4dd4dbb9f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "571fbc45474a6e9b0f8877962b184f871402a22cf57b8127edb11b0a5b7f34f0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "dac5d199957b2c8d1644108f9861e706738fa92dc7ee104397fdf2001d14d0b6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "af7fbf1c439f9deeb285519591f85a3f2d3a28c3094512ea69cdb5e7f1c24fc0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "ec271e600a70e3aba3f1740470ce63f082665b1a94de90751c4fa25c60e9f9f1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 167984, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "eabb934cdea60ed8f0ee535b78fefe7b6b8371af2d8c5bba7e1e01d0bf99a98a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "43578e096f7d17762173978bb810c3dc526bb9255c80d03f9abee1acf2f235e3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "670c07caa0287ef554b92ee5ac9f1324783a2bd6eac5a8cba554a4e8dbace7f3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "b4ac968b31f5c240902214641c8cbd617f426791639dd3a2173a5c494d11aceb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "a38b106b085651fd83069b3a49fdf14f62869429b0669c4456cfcc6451bdbb84"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "aefa0b3a4972fc68e5eeeaa77b2f75a9e190ae430788785f119f7245881b0212"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "b3604d7d4c50ce29d8d25b85c2aa4c88ba3ccd9586d23354e9bd457edd1f2c9e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "e4c73ddce8595a43c686148b4c610c4c8eba51e2d534b8c7230b093de606be4f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "d1336c9e635cce6e80a67c17c045d4028f9f7b1141e5370ba03d1e9b11f42b63"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "fd1189ec2effb5fdee1e217c803ca05aa120efbe3df484cd7947a25587a5ea50"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 167984, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "146b2366206217cab1417798295f7fb0c434ec9e1d59d69acb3d088dd05d5416"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "d9e5fae2fa6c1fa4dd49a8fbc481ba28b5743e5c361d072eab1da899842b8032"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "081e4dac6e7863c8c9f935ede928f205dbfe6d30a9ddffa3d7d8e1a91edda15a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "90ed9919e284b77ccb44878626410d01a35b33770949f6b95cbed1f630d28348"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "f24c3b0a08f50c9987eea8a22e4ec89642773d8b76e1d9a069a34f51dd7a9e5b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 199904, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, "997c6fc4ef7d7fd8bad74fd3f598beadd2626615dab79cea98d6f286a7261c51"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 199808, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, "34bf2fae3a7896336498dcc9b6ce36be21c656a3194f423b088dc05ee0ca73b0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 199904, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, "7f5408cf3394a47737193e290501b1d8aa03c6953979af18ab6b173a96641d37"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 199808, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, "3f9fe8e966d965f845c221475925e81ff0562dd70875a91951c95d52e50251eb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 199904, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, "576048ba6127d598295c2efd451403f9535489b8811e94580f66a5af08dc263d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 199808, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, "6537a8021a189b5d973a91a99c0f685e57e2e98a8d89e4ad8a375a5f14ce2701"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, "b77149372bdf532c8a4cfa77b84f5ed37fb0aeca50b8055af3ada3b525aa7fa0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 200704, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, "e2347ee13a593cc4047883a306bb6b151e27fe31a5d1e9885e6ee8c8e265cc6d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, "42efe3a0d8099a3810cd2207b19db85486bed478c037ac8867e7d753c6f21a0b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 200704, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, "27edbb48a807c6658251c2f1b73df1303bc17d4e79cff053fbc5d8c47ba5ad8b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "94e5747995e18f516d380252af258675f7d980b2183bfac9687b8c3e42d82e10"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "5ee6d772aac75f939c45ee871d1f39fe5cfaf1b28730e0395006964bdaf9454c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "a9d6875edf2c4bdef8c5216ec4a7d2a23e2c9ed9a3c158d9abb8591fb53ee96c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "7c85021116abcebfb6332264dd327f98a1878b736b85f79e484a89035cde691e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, "80ae8523ecd575f6c234eda088854b109d2d47af7c2b1241130177b3fffe4374"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 200704, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, "d0309510d5bcb3f12ca71e29fea1abec686daabd3b89f420995514611a7d56e0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "613572f5d769e0eee0802432cf4d7d25284a2a7305cada9845efa2e9142ffffe"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "4c93e1fe351167dedc127fcd5a9cb1da48a5d0fffc2f58f508caf658ec879d67"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "6c7226a9f5009de15d871ad53f1d1aa15935be2699981fc6328c9f6d255f2eb6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "7c8532441621856e9e9ea5411e44480b16916f2c83e8c415e92d2a61981ce503"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "7e8799275b6fb8d9baab72cec39ca311c1d836c53e76e84c75431df5c4e3221e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "920c1f7c850e9e8d139fc526944cdea1ad5f0d01a6add5b6f3ad134ee9bb144b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "8fe53da81f02751a32dc990f576e770f4cb4f2769594044e1013a418e611371c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "c32ecc536dc077dd75464b26344a26becc6d1ff4c4ee081d13323db1b48440d4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, "83d64ec76ab010dcf2f31f776a92b43ea195fbe158373cffe3bd5a98beddc469"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 200704, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, "431b48ad3c159f9246604c2d50ed6b9d1644c163a9a772e8f6e20a06015a88c0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "c097e3ba83db31846a29b23614a4c7ae4472b845522b56ec6c01ec07c164145d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "568b983f64148bd62643e34ddb13766982312d8131f235edac8a5603cd0bfc73"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "ce79a3c73d43853f3c6432b5b4a8ee4115fa5966f1258376a114b572d4e49d68"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "4f5b5b13d64cc79dcada09720b28d85bbd7c03f5fcecef5782c1adbde737e449"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "122acf9fe96ad0bd711169f341fcccad39a38e9fb4d8cb21441e735b18a053b7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "7346106f8506718cec5d2b14a90c3e362c8a7093d7c9edf747e920c61b1ff257"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "39d0466e727bf156694d5ac4b95b9b4db893f7008043d279d1e7a02b95f5f194"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "d15063f125e5066692426cc9e221c21bb62e9e7043270587856c012538d32ee3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, "fc9df14861711bf8cb1eaa1a6cf0d709fd772845a7e877f099d00bcbfcac9e9c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 200704, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, "0417901fc80b508fa9746c474405740d09da9af8e96197868c09af72326834ac"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "d73a27586112d1802bd883984867849b44574c58b7151580b6c72b3953f2f60e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "bd8aa08e278d29f0eed1fe34afc55a6979559752ff73b9a05ed917669a3f8ce0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "8c30562418f949dc9c27e54312f9ab9927cda861830e12a564c919cb069f9462"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "48ff9e43848233b78e701143638eec5141db24a870ce5fa7095f29191c380212"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "c84c691a60e77c3d7c0c97c0e4754c5f5c93f6bd24d373f8dfc381091aa96fae"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "afbc5ac30ba498c4257875b2c9c2ef316cd04fa9d79f44fa38cb29260963f655"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "bf56721856323339d8bed47714afad593902cabb2c7b8cad58088f31c10e2390"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "4c13abbf1cb7f7effe2febd3d2c91cb29ba29e243a514a9c910d71ddebce45cc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, "19e4aae6ddecf22e5139663919a560a7f9bc81ab02c46fe0116cd79f46497915"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 200704, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, "0909697aec1f9a58072113de465538e70e5475ac5f41685166e071d3376fe383"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "696b00ae7805262ca5323c812aeab2cd529a2d5cf705ed63bc226e588dfdfba4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "c0c1b8d2be58d7ae2613574aab4a510f2a71bfa4cbc0fd6c0083a398f3f287aa"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "fcff4b5b3ab47406677b41cb218d4b55156c5236b80563ac5cc1e940713a6339"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "8e3b0dbb533452fe4bb8f45022783bc15419d605c489ee1646bfdb5914969845"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "2978946563698648f4fffa293b4666455fad5e9168999fad2a07a2a372c8ab96"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "5c065214b512ec86b15db024396666013d86db4e085b401292254b29c013ef2f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "973efde30dc19094a2d2b3b7f13d334a8bcad759606c76a5701c7b70f25b11d0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "9a07a18ed9f3b7840298e687a82fcf6d46fc4d445b7c1f27769906698adfac8a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "94a6612872ff20a1d866af7f83497024311cda74b009ebc55840df918add4165"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "8c1da5caebf65f4ffe02a9a22d2dd3e631fe913450f4aa6e7ea95a4c493ccf1a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "8d1179f08b3f0531bb984498c9caa02a27a923dd36b9fcbf4432655ff06ae71c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "5ffec9803b1d997738c91d787dce55b3f39bcabbe893267754538ed874e3c0de"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "4f735231444f77ed6f50a41f590d43c458c5e74aa99128833210191951ec8c45"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "ddfe85029bfdb4d3f6a2436f06c6ddfeb3ae9971c890cec31c0b9c5e15141e36"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 202832, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "a80e5509bf821eb907688fdd8a35d21b4a10808e1a5568a5c453eebaa787ae43"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "4b2ef0c7e1513f8cf856391396946597dde26e6d2da7a701a4fa9fa844a202a4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "396886d4b9723b860748dbfc6d7950c4df725140b30d603e758b72c6de148978"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "9e38dc43caf9a08dd65bfce1aeba7727bb5324abeace5231d9affefdc99f2f71"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 202832, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "09a4c7a48c87d4f54759b0fab9be8062a459c11ba0b2c87d9a4e4993302fec67"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "a1070a640c361c1668626e2d0c247bf02d22a0528a246b7cf8b031ea8af686b7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "71079b6f1e4d6e63f54b8949d04ac698ca9b4477b3f44dc000e2d66a50ae60ba"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "11c055895f2f4c9dbdc110bd3b5a0d66a358c3c02520f7a3ec6b82cfd372dd94"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "038408c0b6087cd72733ac78f98285dab9c64cc47369e5adcebab05feb00553f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "a14ce9b5e53284998e5b76a35307b2f5a81a5ce436d874626aa282191a284b06"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "7e236375d3065455d37cbf8efa6c47fbd60b734eb1533aa650351e230e5bdd63"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "f27ef0227b7d7ba00dec908d9d8b6135eaea32ee83b49c73857b1a78cdaf67fe"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "3666010ed4943cb377180efbf2831be3d7c04c321acfbc4b0311e45ab29e2065"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "0926f5af8f3cb081e4ccfc86ba6054043a7e4406f45347b127caccce7282ddd5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "a719f62fb47b44ed01e633667094e51be481112d1c7afd401fe2c2010e3a4338"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "59bcde3ad6c7fbc6cc0899731defb309f8f2d8f4934306032828753c3748d9ef"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "01ff4f1f8e28d068cd4e631588697d0c2e2f466218b41a6cb252aa95b53257dd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "a9cacbe517fd7a625bc961d12e7f7a413e9b6c91f78f7e89519022dcd80f772d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "53d7eab4a3522eea9a62cb57735b9dbba5119b96e80da52dee29af6e32931945"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "5ea58ef063b35b849f314ed9a1344e130423aeb7e705c1e12d9dcb4fc450c4e4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "74899368e47fead66a10e8a0080253ff4df9fe26b8b3c65e0c7263692d07eb9f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "fd5e9783f09659f1348fd6fe02c0e46419b6efebe24212e192d258f15c3b79da"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "8b2dcac4e78eb19545a10431297ea963201e010ed63e9ec47caae4cdcc83a40c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "e60235818743d3ffd403c853a7bc39ee2ec7878c1f3cc1c27a4be7e38543dec4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "48921ece4628f0d97723bd8d3433571559f20ad6c05c11a51cc5e9c22c1e275c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "28fddc8ea5cb283b39a94ecf1b13a019b3a30625eeb201561b8a199f2ec8ee80"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "edf73b16fa45596d05d4c7ec3b6091f6e322f400c34cbb27cfe912d23951561f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "cebb0d72d9374238df69692d14d384dd6304b80b0ad7d4958214aa52ab50c32a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "e50e65f1c469a74845524cb109085c162b9d81edc914899594a5f1df28100a49"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "98f45e82bb7341351c12fba917a56b4e8fd554342fafad3de452abbee7800ce1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "e4fc067e46bd2d47db18f8ae9b763833d9b21d10228e9327625634dc362ca5e8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "46212db2f27b21ffcd32b88388966396ec57bf6788d258f6605a34cf18b7182b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "be588122ae52245d0b84e463fddb290e8639573ff28af83b753b8d19c6d59e80"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "183f85d11fcb48a50d9c2551db84aa1eba1654666b6afb0f4f9980975f0311ca"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "fb255a95cff4e309399dda2f5d28e383b5eafcde00865d927ecf282725312be8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "20ed2367595c9687b0bb07b8ec341a69aab028314e142d0d7ff35f9e805d1c65"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "ad706614662b4c4f863dae82aa8c7371a2f0b37f9f0ac660d55998f9c0298323"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "a2891c4ab2e1cf584343f6637f9f481d414aa8e6667e134c1e790af807f5aa16"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "a2ff74d965c4aac2a76572c26608ab3fa8423dea46b4835f8c1924c59524cc8c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "21d5a9cf1d631cf4733afc5f7e214069561bafa40eb21a8ec5634a6c729d6e03"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "88b8dd6d3cb6cefe6bb77f85d502e6b554bb43fac50c548b1346093c4ec73555"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "558f342f933ef4e9406278790bb3e810902cf4bcf7f9c424e8fe9e9b9d18ab44"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "7c772515c805b1af2923e1801adfc90997acdbf96f4f2eb9cba6be2f87365cf6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "d4d23408d57d586c7fa64648ebad58b6cb7e8a787d4bdea49b337445da63d19f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "d2f3ce40383a01a992caea7f29151363dbbdf4505a47ccc2f7a5628d755220ae"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "fb9e73456135010d85d2218468cbbcbae94ce0a63f969965839995cb1e1e565f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "d57cf7337389b39c37551c7a815016ca11cf02906b91348a507583bfa02053ee"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "dae63f9c126a3c908ab241e2c52a0bfc9a3ba00f6a7eeae396b31f59263c3a2e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 199952, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "4865f47edc73958d6640ec78f00fb138cac038ca933e085324398eca8180a3a1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext", 199856, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "32d0633f22eca19243b93219a2c190fe41b964579bc8d2d985a8b8783a310ee9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 199952, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "0fc6e697bebb4265829a839964c0d6da355dffb0b50aa82d603dcd6980db4852"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext", 199856, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "6aa0a2a16d33f8a1714a7c298e5031b7338af1f3ed9648cf129a7fdf777bc17a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 200848, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "df0eb92552f2cbe6f24bfea7346359016810d7324847b337a74e04acfa1f6d44"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 200752, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "ce4a74325ddbe86cecf408124a039f249c811c811e653f73cbb02823e6a1f161"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 200848, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "2794cbf758120bf2fa928d1aff886ec1bf9ef5acbe9774feea80a2c672577c2a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 200752, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "a2722105be0a0e6ee174258cb8faf50b2ffb1fee0326dd5dc131eca1a2b7a3c7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 200848, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "a0623aeb7204e96c3a42fe3962e3fb238473a9a1197ac77854c3dabeed2a8678"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 200752, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "2a168184f853e67d2f7130636d1a948f77dccf5a4e4efed14bb2293ebed52384"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 200848, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "7fc296b676eb8e8fc80edd9bb01afce00b1e1d1e71ff81047f899ca69f1dbd09"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 200752, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "dcdf32e41677a88d44db66376a5066b89ab0b8b15d86f98d8d0ca0cad0401e70"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 199952, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, "4cb6e823c03de77033b036d060fed2a5a8b6af7b6fb0a301fd8d1d602689b548"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 199856, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, "94c43349aba273827763674e4b326ba3a12bd9522cf0932a043566f6f6550cad"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 199952, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, "aec8df79730ac695842c351f39685728fcf787a26a02c9bc57179e3f8a5ea2fd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 199856, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, "97cf66c9f548e48226fe2f0f7d296b55ab55690cb6570edde61c5413ffb7cb0a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196816, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "349b82e983bfe2631f9ee8846b71de2beb932ca0b9aa5304e4fde264e75a7472"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 209104, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "8b4affd1979c6f4b5e00d196ae067bf2300a559caebfd670da330e0802b62139"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 208936, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, "d2164bd9e58b063a800247f47e58aceaa0b7f10b0d40c97ad6b22aa1156dfa41"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 182992, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "baaaf351f41aa09636f68292a08957505c1f3619a5a7c0849777b91daa608231"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 197328, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "1c315f8241e19f33a6741b4fdbb8e1229c553c446a0e65507c1592c540444134"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "6a9f235c0368722fdde2810ec5bb2043dabc1798c8c4c67d6c2aa3db63cf6362"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "c0652dd6f8dce6faa0ffb059382e7cb33b54a043bb2f0e8cda39739232877041"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 208912, 384, 2, 32, 0, 3, 64, 0, 1, true, false, false, "da8206a1a9db4dea9c99bd3f54ef8e289253773b830f3bea5d8cf5b00bb862c5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "780b918f020bee59d0da0aff432a52df579bcc3603082f1caf8ee08831f87e9e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "c124226a9aba4005754e62eb7ecedc41a742e6f3a5b5f6d780ae1a3039952e41"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 185584, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "442bdc736342c254a0a3ec9b800ff6374e2da248b4f72912b663aaff26642353"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "c7c646e5b3e17d99827a4a1f815f18f39180141fef0d370b1107325da5c6ebca"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 197872, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "380d8f3bb4f12707f70ddc7640689e11f3f258e431944db3048d14f7b1bbf1ed"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "4594fb340b981805a6774fb7a73f504fe6ce7786548f904c8d1f193e74e80775"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 208992, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, "8f09f7e6bdf8a94bc3e9628782649f7bcc68fca449a9dc7307a310417d2597ac"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 208896, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, "f24029290d299b0d59749b2d26313c0c99d972f9b93de70fbd60473c435e178b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "459e3a0b33d95ab1ef853a8298f963ee5d40eaaec87649e1ff168f8094a7eaa3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "cdf067965b55aced68dda1253f0edf0876e43120db83b1628676741fecbf6098"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 174832, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "9ec866c580690bb0e48625a2e06456f096e62dccb7512fe47f65fbea003ec124"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "799219b6afbdf573264ccd7b56789b5106ceda8c66a466919268b521768cf4d2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196816, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "d66c66ff1bbba0ca7c38a4457f832db5dc85809cecae67e7d4ad8f062d49dff1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 209104, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "a20c887313a1ec2d09bda2f941171f5ce376aabf6c1d2bf14712434d99adfa86"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 208936, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, "c13d31b39b0b60828cc4c4ddc746aea8fa5fb96bde3357ff5d21479d46169c30"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 182992, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "cca84097975015e788c4f5ceca0cccd4a34fdeedfbf1c6731114f82c1b6000cc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 197328, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "dcaf97118dce03be6de8f2a1b2093c023b89ef3fb193f8525a6a024fc8829e74"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "d8c19434cf03618da0d9f379c5342f0cbbee66b6d2876e3f0bfaa19d330d0080"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "c08378ab001c09b1144815f13284fa0e873914c4399293adf520e367c027eaa5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 208912, 384, 2, 64, 0, 3, 64, 0, 1, true, false, false, "c81359170328eb579cb5b2613c29fda6da4270426f32e4abc64e4a58fc358fa7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "220f2ca1411dbf437464f722bd4d600df4398565b106c3742e1398a8d130e1df"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "1bdf15a267447e8bb81f8a40fd2cabb4b1da09ad4d55fab91f8a5cacbdab6260"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 185584, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "dce7c21caff7c168c2bb8b966ae25fba5192476d86f0723d15c9c77ef32c5c73"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "1fe58e807406289772d40326962adced4a13a7156416233c5889711540a46bab"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 197872, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "6cd629c61e1dd888c669487c4f71eb259c0ec716671dff43eee798927a080443"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "7590a4328b0c51915bfe1e2db619a32b6c9cf49146b369ad254123cdc15f60d3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 208992, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, "f91ffe7ef073c6a4acfe6f0635c90eff159d7a2d36a9aa873f488aefd49e67d0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 208896, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, "873adb23313da4bacf4e48998aed33eefcad9528e1b9b8bc6d81ca15d21ac05d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "1720527ee4e9dab1fad8bf95dcc112ec7aa58206f1f835bd0d8d3358165e6f1e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "8775b2f0a0db0edb07481fadba618c48a65fd1b80459950d52600b5cbe481b19"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 174832, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "6a1b3901dfa72b2bca4364b77ceaadaba4d9a8abdf208737eb5a1777d6aeaefb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "e921a020bade3e16628c51da83d7b2128e723db7757e6f415c83cb2e00548204"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196304, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "6b3221aec2f0a03402ff625d1db04789064fe1dc20abdf5972b1bed33bb9183c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 208592, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "260e9723468457291426889d55f0895637afdd019e7fb6ecc9a445d118c19965"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 226472, 384, 2, 32, 0, 3, 64, 0, 2, true, false, true, "88b5d583ddd7d1bfbaf9fdf426d27791436fa9d2eb4f12b6db07c6894a8711b8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 208936, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, "71c2813018d6ed8073ea913bb023f7fae201363eefca6d8a069749086f4dfed0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 182480, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "26dc26e488f02c336e29458816e64e177b8e616cf63e6b8d30d667fbe818c593"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 196816, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "b97ddc085563661aa7ffd43f44479ed5f25f9a66d5fabac770958b91ceac23bf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "91d77e175202d16b32159d77fe2c25580d544bee250ca351cadbd6ce94c151a6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "e1c211f3e56fb1988b80143bf14f27b22e7531ff4a2df6c43a02e8e95ea01d4c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen", 226448, 384, 2, 32, 0, 3, 64, 0, 1, true, false, true, "08256dd03cc1a2d4687eacf16e01a30bf0531ae8728e906bcee55d008943b1de"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 208912, 384, 2, 32, 0, 3, 64, 0, 1, true, false, false, "00b8d117da7d7d3579fc8b7f0c36328c6c45964741bc20a5c39f238099af3b2b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "fe72775b5ad07c420aba3ad262e2bd514eacd6daefcff5f06ea82c521259b7c4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "b8ef98b84df1e7fc84e5a1aeae3a41312a66549eb51dcfae040e899caa35d858"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 185584, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "d67209050e31f9ccbb61340bec71a7324d6ab38aa5c0470b1d401c803d20afde"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "7cda06b2461aa3ec7b0dde77b21f9267d018e5b4c837a4e4687904ec06cee561"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 197872, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "8ddc4d8014595d95f3d966a9007b2ebf606bb21f0a236a68aeb4a113d40313f5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "dc123b778b875a790709bd3d7ed96119c30b6a90eb89b5d226f9332e8cf9f84f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 226544, 384, 2, 32, 0, 3, 64, 1, 0, true, false, true, "3f6685a33c01b6a3b7abdf2827f90f1cff6acaefb4fbcd9aa7ed073da00a0ed4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 208992, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, "b399eafe08c61c8e813ef05885bb272e2e83c4eec5fcf258ec489a736f34a982"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen", 226448, 384, 2, 32, 0, 3, 64, 0, 0, true, false, true, "dc6a40806e1fca803f0f03347c6ea0dd441d00da3104cb503d4fd9972b2690fe"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 208896, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, "fa7b873e96472060f3757e7b13b2ac089e6201acdf366567c32055858bd57de2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "3658214951f85b661953c248c8a43275faf146273b364cd6a04ec5f3e47c81e8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "bbe13a2466687c89fe7c2a83d854c91d2d5ba95eca55e34dbae5e1c73808dd4f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 174832, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "afc9c2a98efa915f9d43233a4afd428c151ac252c58ae1c0fd4652c4a78447a0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "5cdf4e2319253e1873bd56a34ae33d5aadcb28838526c768832dc1822b7b18e6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196304, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "fc0ae25e7824cab79b32a5e2afb9db901a14f99b26bf44fd8ad73a20567b2f3f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 208592, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "68769f2d0de2a6e87277091e2e728a07ba0a5a49a5a48cb6748cd26399633b79"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 226472, 384, 2, 64, 0, 3, 64, 0, 2, true, false, true, "c3f0786ca9a931e8b343c0f546c08f95c9e9a4d0bbe891a3c85cebc032dc4a0a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 208936, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, "07817bdca855b8250c38d3daa6fb5f1a8de5e4493e88b55cd05d5072176acb76"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 182480, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "092e8246fdf051cabc0c7cc4740e561a62e868d5d6b65c6835c166ee96a9e325"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 196816, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "0bb5dc0f164049211d288076136a4adf48a1842c3daacadc101dc0db3fab879a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "c13d3d0f8a85b84a575fb141292a6c81a272421ffabce30f10687df6381dd2d0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "48e28a30b49e91d5042bedf1bf83d16cd5d71bd02267c64029a23ea5a8cea2e4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen", 226448, 384, 2, 64, 0, 3, 64, 0, 1, true, false, true, "64b12eb1210076afdec3d9c6bbf00cd15e1f0d7232558009cda353777e1ab441"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 208912, 384, 2, 64, 0, 3, 64, 0, 1, true, false, false, "cdfb953783a050fd8898710c65e1e1a4d4a3ebf6566e54829b353d5460971dc3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "089942c946dada0a2eb2304ef30095ebf3705dfcb939bb8edb750abf2f4391a0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "28a2b4aaf2986cdce244ee99bef5d3ae28f6232e913150ab8aecb57bd4dd82dd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 185584, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "abab197c8c83f29941b241be5eb01773251c59a5f48ccf17dc2a8808bd87d136"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "7fad5567f3ac1d4dd032fe0b7cf88a7005ba430e33b69cad0ba51133a4f9e1cb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 197872, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "4558194946cfa222a33c74e34f379b40f2e09ff601d8c83b76c0c4a2d71056f8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "1f663560fffef35ea8fa983eb6972633e09e41dad825e8cabbbd4c737e6fe566"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 226544, 384, 2, 64, 0, 3, 64, 1, 0, true, false, true, "af6289067b8432dd18ea4d3a22022be6987bc7d87d4822fe335493d8b84fb7b0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 208992, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, "1cd684ac941cb89c47d267455112fd50ace95f2ca2b078fab7fa6bbaa3ddd713"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen", 226448, 384, 2, 64, 0, 3, 64, 0, 0, true, false, true, "f11dcc81696666f09f9cab1451a977082b4bd78f5e312542b95ecce037fecc0e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 208896, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, "8334089322291ea3c271060653347cb7345c7028f402f2feb8df144adacd85e1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "a562352f7d99e7516023f2845ac05b9a351810a9532ecf7c120d3bd840606549"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "91be64c7da46a17f29ec10464965e66981c46125b70edeba924bcdd7a7dcaf80"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 174832, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "9bb6884148a480bb9a2b953e8c560794d2b7fe478d1f87b5728e195c6b05aa6c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "5eab749edc09252c344afb3c9173a9128f24b74eb4c151e3b93d4dcdc592aa8c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196048, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "4996738a706518fdad5278c7c7f7858380f28b5c2a26abf513ffa50c0ca37c6e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 208336, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "a0bf929bcfa0b5e0b57500e14099c48aa32b027a7b9a71cb0aaef24ff847282f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 208936, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, "3102551455f9413fe0af32264faf8c8e3b583f1f129b25e718de71d28c1ad478"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 182224, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "925df3990995a4c6178e64390a4c328626aad40d32d9550e53739c1f367aabd3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 196560, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "21f85c48fcefb1390f304b28bfd986027f48bb35864df570fbe7f005b1a292d6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "90aa5436ed19c37dfcdf1be11f1919c8b511f2a98dbea9af833ac2c601ec0d9b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "9efd6320b2814eab4a58eff30c5474e46e3cfceab3c711c4852217069e97be7a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 208912, 384, 2, 32, 0, 3, 64, 0, 1, true, false, false, "bb4325d63ffd0a8eda0a7c410591fbfc05cde1e92354e7f2512557776cb387ab"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "1f050d40ce7ef903e4e2f9867bbe312ef42fb667c39526146ed4d23ec1ec7505"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "bc0beca12d43739ad7fbac4b822450f5dea1fe21680150032f428a4f054df00c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 185584, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "08e24ae9494acdd1494f696b725044e827627b9efd1b1685af70c0775a930b4d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "7eb3444e31ef143746f93ea6eeb9086c99a8398bb8278e9210ee53187d53c787"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 197872, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "16f39b34f1cebf4aa66b326f3410d080c5acfc144343bf1d6809fe9396a86f7a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "459e2b4fcd87d6664406dd5518e4c5345ac1447ab666f49929cbcdef04bee3bc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 208992, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, "c569c74fecfc41b4d17a9a7f553e8225408e0a8759e23d0f4661d40ea3891539"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 208896, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, "438f5e02445601b96c397b76349c1b199cf9057bbeb80f1637c80654f7f67759"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "a500eb826aa86ead556cc91c084d86f8a3dc995d270a6c07d92d32afae4f0b94"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "d7038933ade0c1631d2beb8b909c0efb7cc9c2fbcd42e398fdec2808e373d941"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 174832, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "d821ee62330917ee8f8d40eadce24c256c5a7c2af8444ae7359e0ec428dd1132"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "8da76df7bd86b5e38f94813b59c4e9d8176efd58481ea7de75d8b49bc1c38204"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 196048, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "d8ee3ccb51362b8cf79b757c2617ac2bb50fc10996d9d0d75001e78c7c555237"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 208336, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "a3a3dbe0afbc39d4993c557ba429552d7cf0b5718375b3bfc250298dbd11628f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 208936, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, "3c7908c4465848e4eb167538ef70d6c3f25f9d00fdeea73260dabc85d62718e9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 182224, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "31533286934a70e133220c052a4a9daf8a2d03f12c15df0c5b3900b4872eb972"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 196560, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "925202625cf42c7cb3eb06717bda4390a045cf63856d50a193e4550d676d4336"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "2aff5aad4782a2237bd15dd69b876970409b2272b80c251dc8a307521483eda7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "385120e08fd0f1c0dca581212be4a877dd2f6c212b40e37ff91138d72cce586c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 208912, 384, 2, 64, 0, 3, 64, 0, 1, true, false, false, "2ebb387fb671b13a36d262cf8d219e0da18dc99728e8528e84928ab70c930b12"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "d76858599daace45027e3c72586bd82102e150659d9b2a12e2cc2494fb971e00"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "74aecf085abb21c4259e51c7a474d4a42fe4687addf2ba5f5bf04c72d912f361"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 185584, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "4f42b7b035f862dfe600c1de41028549d41b5870e4c175b201e4729de4b4ca46"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "57cf6f1229567fd23ebb58e66d4a3b0de976b58d3adc2b186a788ef9f4e006c1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 197872, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "a04e40d243dce17cbfb983edc2b54e95914925d1f3e33419fa76d3424045a14f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 175248, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "3957780ff130c75aeee70f6fadee8556a352801a1c8268c46d87ba586f732efb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 208992, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, "b0091d0b6dac6812f579655e80986f8b5059f31b1c613b753e133bcc5c587f03"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 208896, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, "0b8da26a214a543f0b3291527079645d2c012aab4809303ad4aba9a872761b55"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "3195d9b6329c68e003d9689a8d13e4f1141a8c075559ea19ef8355cc5111a247"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 149136, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "8402a6373ab8077a9611c905ac54b962316bef3a49f9c7c012769d95a930920d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 174832, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "d015e888fc5f0d469273efd451dacc50ff2d04c1565f05b6f3bff37e05548ff8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 163472, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "a7f4082b5e3c4ec9c10197f4637c221595ffe9a63b73c05130c19a4b214d458d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "123ddd4c974dbec4b5f0e6cdbf2fd3c10dd28944ef7bec68993f4650eeaafe87"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "9037c1e3928201cc5721541dad3dbbd296685c6360e70cb529356176e70c6ab3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "c4a1a175128901e63692c012c64825ec4ea79a9559390312522999fce323d5ad"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "1fc3bc3ab739cb5eade4b45548646e920d038474c2c852c103fb0ff6579aa791"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "5b86f665a823ccf4eb67c0759e6a8de3ccc8326e919aa4138debc061d144adbe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "ef9cf674fe9b32548f5669a44566e8f5ad8709495020b470f99579568b42283d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "48602a53fd19328b059a994addcacef0852ea080e71038ce21fa2f72e2a35925"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "f487b691656dcc2c75382ef5cebfd0388c10fe305a3fbb3cb205c0940cc80809"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "41fce03165fa21cce1b977325864abcc30587633715c4fc3adecb11cb1e2ff11"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "0a1ba7983cd27b15ac4745b6bb7647264a43acfb49d1438e61e11b993b8992ae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "2b0de850571886dc9ff8bc3d0506f4144f941cd902fe2a0578d6746a81df87b2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "2fe058bcdeeab183443873ac7d35e18cbacc4e5a115da76a338102854271b74e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "4deb1303f6bbe7168c317a967eb9e621c5a62a8f9f964e3d508c38774cbc2928"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "62e4284ea770ccbc049168132a83864a514e9ff79e260e0cc926e55187bdbeb2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "297b5aa5688656152731ac55ecfae3d19d3b443955bca348a6f6f58245cb7845"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "b9f5528524847f4963223bcc671da67c89a04eadd44a1afe11cde637d5afcd8e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "e433a4d7f755feb60cfad8eec637ba3978c114cb846143796505335f6d70fe02"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "6a50dded94ff97cf79ea516e774cb22ca8066597e9eb791740ebaa617f58e7db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "49391c7fc3fef89fc0fd0a7c4a6488326b23739c3200863d7d132624ed6b09d9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "7eb08a2e6a08c54cd8fbfc98729b80eec69c2a53b6f607385226bc6274038db1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "bce69cc3d927dcee848ba68ba505faa0536031893b39aab2d661dba1b547b905"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "7daebc5f7bcdd9ca1eed1585531aae713018e33cca30dcb9d278f1249bd58551"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "69ce0620635c62eaab7ccdd1195adf8cd74a7a315394377866b788dd44fa6ac6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "48391db7c70928a8a66688ce943bbb09913a40c5f8463b67fe0f2cabdc72bcb0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "efdfd7121443782d0eee0de2986166284a8032234f117cbd4723adc9213e0ba1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "10624439d3574e3ab7d4269f57424d5b623925cfbed337f12279ad17b232c6bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "e7677bbd212774665ae4f16c8c9e165ced8769b1dc211ec70d58b67b431e6649"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "e84a897856e9875b664da93a5c6d43e1325a8a866e3015dd147c7906bfc2d227"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "e435cb50a4438d0a457a001b7c4e1188af8a938b001b3399caa47e41ad1fae5b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "74a47bbbcf72ccb2f37f89be49ef8dd8bb86bf65ed6d8975da34d131fa8ac8ea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "21b55e4042c15fb95385b442c1c4c09ba749cde6e2a61752fc6fa03b610f1914"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "0f2aa0ca42a39ec489000baafd77d463f9a8931b808963c83243fc166cf01181"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "c36b5924c4b72d401346b070d80b9132daec61141bcce41e484ae6435ff1c3e8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "cae7fc526c49cf43cf658a895f9df9220da6b7e5c0f6405daec068b099a88610"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "0fce45e21c2d6c3ed4ba97c50e5b4ca0a9559872ce6cc5ed356cdc64b30092fc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "5bf206a4b38d4542b534240a84206f3adbea5dd57c4736902ae000b5e978f048"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "c6b43729262186739dd858bf383c586028c12c410428cb7bfad28cfb5f389bbc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "2abd0a26e7048868477956d58e023979b7ed8919681a1873705ed502ec390ae4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "e6b7717f012b309bbfd423489b1247512620e2ecf1b96f72536c4049c4a36afd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "16dc52ebca8584eb198f4f423dc98e757906fac0e0effb9dfe67fab7024a669b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "33cbc75d9e6d1f12019f951b3e28a2eedd31ba64d2e462c08af94cf1bca2ceff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "f7ee402fd4835940d96444f8591bed5f5bdfce3e0f6f5f017db9733faa342337"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "c4f704d80bc2e63f6de6ba64792620e4b585bf476322a317e023b2411c378f97"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "95c8fc3d0957654661284c851c1c67cd612c23c69f8934331a26776c793520e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "75cf563aae4fbda897dba150426be896e998ee4e72810759789491f7a9b4780d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "3a8d7cec846cdcb1e2887af158a38cd42e1fef697ba6f9161553449cbe1ccad4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "a213e19d9c7b0454dd7d255682320752a8c731861fa671795c911e41e4c2ed67"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "f0e3c6f881eedf2827c1a58478afbf668c74751f48c442966498e16f513a3449"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "4f819419e7447d413cde3af93fc7b9df67484fb91e4636d3e33a93ce622cf15a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "3f17d4f3347d16b9e75d8415143f99b575c026f89e674bb5873ab00f44868a0c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "82a7dcaf2e9e2f1a17cd0683a14d6287e9d5b45b5dc08c6d8b9f1817f6f80bec"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "7756e6dc0939cb9c86cbb363d5827884e1e0599e2033790d4775ff2c33a4e555"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "db9bb60b2017a0d99043204c1ad060683b47a3a3dd17fc63a4df1bb0622c3cac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "427c65d0dd8a126d66ab4203baba6377ec58542ed02b36ce3ccf59a7b6c65a59"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "56e1f5ac50836a46438dd66bc14ca56ab7c1f6c789cfb091d9c703e1783873c0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "934c2598a2c40aed4c4ce139aa76bf9374b6f31e6c507adf6b2e5799f929a004"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "4b195c345c01fb5f9477ada5915b2421c1a577603ef7ee651141485fb30fed82"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "dce523dd7f85fc37b72b7bfe7d3ba5b4559a1b094f7c50f5cd13d64dd29ce661"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, "4752277b5f87f7007725347266fda186206ca17d62903307f49625b15779bfb9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, "7a8b28c90e7ac5f5d4df71a56ec0b4aa119e01836205a99c9ea23ae4a49819f3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, "b1b2cb28192c745d9d18d69d8162754326107bdcd40b58507e257189b233acbc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, "20117ab0e46712fe18b8574d21c81060a7476f9730c4ae24fb4b04bced2c17da"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, "d6e87c6c79c70830410b9f2d44707a78c25e2102ae6dd20d1491f854bd4cdc42"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, "f34388e45f957d729832a1a783a511d384b2aa2ef32e7b6f1f5e45b28900d389"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, "592bb2fde2b9cd83985eb5c7dacdc8fa5ec054b3f44e34d8e6649d8c4c23e9e7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, "efde07e304698fbdc6c90e9bcb377a52dcd7155f97525d09ef42b3c83a5e48c5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, "3c51c3730ce60ccd8b86537b64f7c3ceba632b5b162c512e9d74de6ef9464449"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, "413338100ca5f6e058d03a173971afb72963dca54b4c3cffdc7e2f446890e660"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "f0bd99409abb1f479cd8af9b5e95c61b2ee3dba034513339eae7a77ed421d1cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "92615e3be0fed19028e549f492f4a77d1ab241684c1294657606d5dd90fba369"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "cb7314646559bee38105fe7abc53493000cf34a9fd4a05dbb58d0527d8ec168b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "3b2fe7d9e0a7b07c840a1bbb4c1d0062fb82e70d1f6ab7ab8ad3de20c42ae5b9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, "54706e07821a451d34d0b0b5db026651a83bfab218310c02960b79c33df0e1ae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, "4e937bcbcdefecfba0bb072493fca10e2c5e67fc5dc74506ff59176f2ab04c9e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "d0d57a683c10175e5665a2da8197785db404edd07a42f8653a68320597add01b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "2d20fd59f70a1ad5c8be2bc760c5010e805693945cb10a814a875cf28a37af3d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "cf598e450572475412dbdf9faea40325e77dee75c3e9b4319c6d11e561c8491d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "9a6319fafb4a51256fd57110f01192c7d84b41e76c6b9f394475a301e4b31d4f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "f4ab284e6ca4eee99bd2af6fe5f19b4aba97011f31b46a1fb82f7e76f04fd0d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "c1cd985afe2bf088779c8de67bffc17e91462a5f9168152d94406f7a6f0f3f4b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "96280c26baa505e9e1f4965988ea6de964ea2b67eb580990e877a11aa23ddbec"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "d6582621be34357f13930f7c5386d05b38ca5345bc4541a7c6581d213b93b0f6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, "b22b3cbb5371bb1e36181bf38b17058fba23e0eba3bf745f2f5338bc60f0ef98"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, "f6c32a324941f1bf05624a8235b7868ea37ef08a10d1574df6c3e7b52307a0a7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "a8d1e7dd03dbdcc9c7db6cf03ca698f9f2b919e0869319d19638601694d147b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "e2d743aae60e7b68ed3a68807777ac84d842c4d712756761795b205e4fa2e603"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "4eff91845d33f9ddb3b61207934485e9942f1f1e7ce71f3e909025f86c14e383"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "43ee15bc9a2e814182b825765fffefcbc2f3c85237dda25f948fb4dec03da770"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "56d6c742f2d839f4bb36d1bcb33f94c2ccb94bda27c04810db10674f53e7e07e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "eec474b2f06a54d7ccf2451099a5242588ec80da50d64824fef5d0e0d6e253b1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "900fe35ca9ba003f98f8b763ca0482736a9c5042bc3839c8171c4327f4a2b339"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "fc70a1b191d4f8a9ac347625ed765f614bbdb99d1d0ffea27627483218be336e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, "882a4e4786ec4401bac307af5c7159255b3d63f6a4d76dada734e6f13f82e7a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, "38c094f9892976ea8e89de3a679e569ad111de9f647e82b037ce2930e36efeb9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "7f52897fb0ef9cbfa4fefbe59ed60dc36bed97de265d39bfbbd52cda655a19bf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "6d7187389d0af95c7a10a7ed8b6d312baf57744694ab3b74b04a89da9040cebc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "f7420fecdb0f290d51179399083381112f22bf59cd9cc3893dd685d85707a797"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "a46bcfbd3615a1cdb783c6583c7df7be0b9c61f2b0329e4d2350c7495624255a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "830316e5e8d03f134ee3dd9f3ad1ce4ed22aea7a56b9f202ff8b075ed23d56ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "665fbe0a578d825303ba4a5c33108e68ceca55fb204220464d46dc6100f7a4be"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "798260fe96a5a2099bbbae47dcc421fbeb183f6bda279fe1a688c46b417763cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "911770b808b5176ef12b9c9c5fa0665977d0e0786e349d6a7d4bc9fee7d2948b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, "9b667ce5698626c2f7e30f281373a7eaad6ae7bd10baea6888486898cd144429"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, "bb9141e3e0229ad24d0eb4c5f2621d5eb41c9e8b1e677c45b8f0c85fb027dcf0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "ee17ed8b519bfba15945f6a59a2f71011d86773e09958bf625ccc810d22b4e12"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "c6372f980eb425c77641df17d97127084a7ee42c07105ef76ee6cd405bc3a290"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "c72d4ef6933e0109c2c9b8dc11cf44256725d831d3e4233a0d45104d5fdff157"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "a7f5ca176318377b1d7800ea1dcbcc64c38ef19ae8782e3f4ae53a594c22e6e6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "7a9dd9d8f52aebd3d439a2326faf344aa6dd9850d444fb0b3fb509b61fd1880d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "e3c9fa2d6ba60869f213380df7281fed232d82c06b32d435acfb338b3da0aa42"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "2dcf38a4c3319543e24b55a5d8dbc3bd6b20deef97940c42694a8520fa7fde09"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "9626589535fbb59779e4a3a8a8fcb7fcbf424c90cfa0e46788f9f6de78c95064"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "3d3e8512cf7bce0e592b498e61cd0c028c0362916273115d2f0e16e432abbd76"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "3ea23164e92fae09e03b39afef7b06dd48df8677f3302acf01fc5fd0a6ff4b17"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "f2cf83353457feff33da7ee77f978fb749c4070227c98b2394c3ec67d07379e2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "6b61602777cdd434eeb207051e7f2ef9f0a6a342dfb1dcec8a4415276583cc40"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "832b68978f3cb6347b5755c3b584f046803d1d460ab2c31b2675ac0b07b17194"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "9510d8c98808efac7ccec0dc7ea3325ea1f941a1428ca175c1b88f2714439118"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 194640, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "a52bea5577f6151d233242c6c80331eed9445b450dca9add1a58b74abd98502e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 159776, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "3b1195e71e4536415fcfcee89c7a8d1482ee4ddb5503a068bea6eed93ef8389f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 168048, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "8ef989992c37cd0e965c27d142d5d7cf7eb31a1f69bb1ebbb0658a7dd401417e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 159760, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "755899a4dbdbcb955e1cab3d90b93814f3e00f6ef4547dc026804734d03bbaad"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 194640, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "e48590bfe465a29b1ebc50bcaba7f4aafd407e63f8de6cc931ded12bfc692bcb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 159776, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "10b7c3b2f0ee0041f5d68eed83346f13ab4c453951d610f47a8e8170b44a9250"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 168048, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "b7037f77a552eb04f8d7486f3d67dba3a6243fde2854ea4ad75ec133751f2fb7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 159760, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "513e3d89b3d44eb2be056196535b0e4e0bec4c11a1c0f4892ac1f0c5ae86da16"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "2565fe95c29b577a6c694d2bafce621bc1fdd31b6f5efd4d5523c290b3abda3e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "1e76f38ddde46f707836c57c79ce1826d9870a8346acd1d6e956c29b77816218"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "44794a2cf7ffb09f7ea35d7f079d86d58562518cd1cea45fa65044507f15db8a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "94a6acb61e3df18489851e564db8759c525f4be2aa948d0583680658a73d366d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "aa08030b1009e4f6d0ca664bd06d82d9bf6065def64cc1aafacd2dd7a3307472"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "43c9106313269e97081e2821d9ca6fd6c068142be3a6689f297738dbf394ee5f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "64e83cfdd4d1b103bafebbc079ab09c58d56dd9bc12a71285271ab77f7670b1b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "1e3a07a99b85bc6217d26056aa40f81539f0d828a1df46eb165350425e2096db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "d9e2686543ffecd6779244100812b5b37ae3dd3bbc39e6cb5537fac8676b787c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "a4ff27bc502fb14e45fb5e7e5716bc4f1726a213efc2004707ab9d99250f81ae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "c7fa8e9b075a4d9a63242aebdd39a4b146a1d7c3edf5edf8d783765f3edcae43"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "a4d8c36ca9f48a0ac6e254eafd322179dfe1da86756e5d50b70f8abb885c4ff1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "2da963a3ffe6b9dd02870ab65d90e06d2845a6e34e8dded9ee818a6813bd2c6a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "35fd9957071b2b99269424fe31abbbe2caf41465ecd2662b0ed2f457032ffdb8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "66171be753d7ba0dca2c21caa621a9a2b66b6ed2a68d74c03e54baa9483789af"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "7562ee24debe5aa8a2305f7230e89ee0b0f86b945ead013ffe1521dcb4d95a11"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "1068476cae4f0a6d84a6b3606f228b82d39c826f539ec9d30bf0e3ad78f00b01"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "86c9a2f83c9f943f93233c071c80a33467942e43c56a8c1627a86b5785d39e02"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "44afd804c1141fc1faa973852124cc5c0fdd7e31334781813c4b5f3a8d40b7f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "55eb7d5822ed684c84a8e06a5806438c9ff5c986997729a516814ea3465906fe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "9ffc5ed579f2fa3f8ba2136e2c302a4a1b8d55bf8bb4b52d90e67a5294f740eb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "e87b183d10cdae8c388e09c51bf60d8144d4584a1d13ccd2919504164463fa49"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "b12aeaa3c26696ceb76b63ef0f1519daea64bbb2d4ea1f65d391747fa8ed469a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "c3bcf7c5ecb3022538c25566ed410f8038492d74338dbfc143ad1bee758b2f5b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "cde99173d4027c6400b4c873f605f30030be1fc296978e36def1b7ae2fb9892d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "57fc44819ed51bd51d080e59b0b434e3298795e02634098a242558b37fe34739"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "d333b6fce0ff13f554fb157e63d62e01093e26c592bb00d45f99cebd90cd1e61"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "7ae612007bb504ff61fc39ad4a3a77f441bd31441799418ec4dea5f634ca6f6f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "db87be3eab3ceec22a22206e2a3226da9433d2b7e6e7f6ab97d1f7ea2385704a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "13efa6ada15015f993b30456ec13838220b22d3aba7edb7e51a60de1c0244659"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "9a7497e7dafcbcaa90c93793b4263313f491040ec69ff54441e9396ce0a15c9b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "7a7b61442e6152037b872a4caa5f6d8b223281fdccd9c19eab6a18760f074e9c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "b391603737b66a73bd0f3ba830d8d22dae7c7d3a09e45567e4c54d737a263086"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "62f8a7b4311728eaa9f632d6089d94ca7fca65d2761993c54fb639b4f021ee29"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "1c29bfd72c4f245a0d038bd1c8fffb666a6fb7872743683598538a63e4e939a8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "1269592224044580a2e2266c56f7b0d590a945e5781a54a3b4bc8a41ced4580b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "832b1a8c557fd5abcaf057b5ff0a319c91851660a4dc0cd0512d02e30a4fb57a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "051a67d5a0f2039ba2c6204cd91a0d67e8165e3224596e74b7220dadea6f0aa7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "7a81ff7b9137bf8e08338af3879fea42a9b92e17bc85ff9f4538a93f27703593"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "6d62502a2c01988aa3e26359169d8ee9da69ec99c5727b5aa5790e793137dee5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 118000, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "b33aef8f847d1b0880bf55be721e42c756eacfcd4e68a6dbf1243169b7ae2ed2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext", 117904, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "5be36f5177d97db4b2af6e9895164b3e19de0317b7f2c4170c9bbfb3c3703c47"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 118000, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "ced89432668e54f754fa447d12cf6ab5b92958fd51215c18da6fb2131bb5f17a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext", 117904, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "6e8940a09ee026fc5516962b085e4b29a949a7bf2965510c40416ded290903af"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 118896, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "6d5ca8f98cede08c6408d8c4dc61a79cf52dd592d413af3976ca2438864d7709"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 118800, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "decdaecee7bc4181a6e0e04a7b9da85c0e81c7b41744cb93cf18c10e1f5675fd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 118896, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "a773264f5bdd55d145d3fc0ddd486c1192fb7eccd0c046385cc37e7ff599ceda"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 118800, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "c62e9126dc9934afb80a65674854721c33d0893535da7f9f2c70d667de242aea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 118896, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "c9ad00b0b58925067a631c11af4b60ee15508115fc78f20bc3662c1d68257842"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 118800, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "0f86cd9b64df5658115bf968ae1e321a62f97dd8888497efaf51c401ae7f96a1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 118896, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "7b1b2924e9d0ba1a4450b2f69acde4ada26ecd2075e7004811903c7f68876680"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 118800, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "bcd541c989271f5fb8f41a4f8f11b882a66ee54743b8adb3e91bb26b5a65a160"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 118000, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, "b0a8d8464cde1ec6f82edf12e04ee70c01ebc1bfa3c78b2fcde39c5941a65de3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 117904, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, "0f54bf08de5d819ef8af04b62f0f213a7e314f9c22a76a8c38bcbd717b009221"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 118000, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, "058ca1a0c9fa250a747743c41cfcb01fd6358e13b33752931c5aff47718b4b37"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 117904, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, "c0b8285d78ca12d8875af49855503a62c025b4c7398abb0c0b6e3879c4f4a6f2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200912, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "7a0262bc4c11f17597d8a8b653eb3f987715b8af6d4a1044ce7728a29abed947"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 200912, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "9a493502ccca233562bf872861c5478ba8a5cdb683c7b869ad32315e13123fd6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 217128, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, "104b8e2a41f39bce78c9f1e8734aac7c2ca3690370cb24fac38bceb269eef859"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 193232, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "7e7ac1031ae47b3100cc381d85c4a02ffa15518c198bf10317f562408967439c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 193232, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "203a58e9bd14b44b6361a81b920c160f7b4138c978a693c4ad96860f8c5676b5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "fee820f192d00d5b9cfdbbae1261bbebcfcf2b88709a44f67d5612f5d9752c8e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "0fc34d7a886732c83bb59ebe609b593855293e04fc978b89f2e340c367e10f2a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 217104, 384, 2, 32, 0, 3, 64, 0, 1, true, false, false, "3a49518c7ee59e06db79710b52df3d85e6fa21c4ebb2d1a812ea7cc507542d32"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "74096beccaf63910b802ec964d60e1929e99e2becd44195c07f5ced447e49aed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "c39139497c0c68358d9fa539d9e110cb346d351071527b15510814089e203200"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 181488, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "0a0d6508e306c9bd06c821e9b00b879285c335318e28a0bb5c03468a9e60edbb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "038b11b134548da1164676b4012539a5040b979e9824ac64c08b27afc4bce85a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 179440, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "357968ce0b0120e2504c1e1fcc79a24d6a374bc32266940bf02a467fb38d48ef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "65e0bf0241bce69a038d4d77a9ffd1c0e5e940608e84401a7866df659e3cb860"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 217184, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, "8634c4f34446c0d30a4ec26620ac7db33579b9b71add12764e4aa21e8f98ec6b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 217088, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, "a311f4df15a4215841eaa682c22087f46d3237d009fe16274284a24b91347c07"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "68e351a1fe4ed0175f6b4f7a8268bb1102559b20e07a82760b7eaa191719aff2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "cc57bdc297cad9d93740ab40e61c43b98aa51e26f2fa4b971a0151865b7dfd1a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 165616, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "0d59e0566d15d013e4cff0ec0265ae790bea2a3f0dea9765b3d420efc5364b7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "486f7906c99dcc04243bbb86de2377d91c678619d57723a7383d76024f050215"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200912, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "5578246ba3cc7caf4016c88184b0d10672c620cb3a083267064a80739b888cf6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 200912, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "2c12080405a4398429f391df57a5a147bf94d822ad0f64567a6799f18d8af00e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 217128, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, "bf9ac35558c8e1d6874db03896d4c941b175065b0251e37131dad0fec9ea9f6e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 193232, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "a97f091e4ba3e255ee7e579fe8bafaa6315dc49eb7bd13ec8304f5fb086501b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 193232, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "d082ce4007c7f743dd0e126f62eaa62a38631bac1dfd0a3ca3f12c002013f9a1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "2f7ed78a9f8f9cba14c9e462b2458461e73db4e097afc5acce714f3ad9a77d7b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "fa4e28418470ef79195ebd4d813028652c6d9c2201676f92e255c56d902296da"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 217104, 384, 2, 64, 0, 3, 64, 0, 1, true, false, false, "ecda52a3b47490d70916e1a819299a7b142a1e42b1f045373cbc196384f9ce6d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "e48559d71987249eb2da6191463df49e0eeaff7b00ca9e4a03782d55037cffd8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "c753d6dd2e35c35febb8bc245c7ca427605712ac1b03e518f99f4dcb860da6cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 181488, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "37234aebfbc47db7afebce651113507f0d6c894872df00b8c8f5508223c90c6d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "b7e4292199f70e5fceb044c88fdadc8e85834b4edf07e222b15bb1281f302024"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 179440, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "51137b75ab98c8f33a28e7b78d74b77a5cd3776c513e5883d5be5a2946e6e2eb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "ba56893bfbbd58abe21b2199805739c1a5f9d13cb9f5cf99e95b7bc5febb8559"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 217184, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, "43deb7e2214ecdc2b21721a21168dab8fcdcdf78aee8ab5704e07246ec38d707"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 217088, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, "2202a5b287a8d931d407a715616a37a3bd9bf7fd30746674740eaab659f3b00f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "f59681f824d89f1de8620a051cdd05953ea7e1e6b5a836adc3c5e3ea6fc8d0fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "2e3aaffe7e71479c19af8f5ad9170bed64dd7feba58165da47d856182be0cdde"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 165616, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "052c883094eca58d2557764926267907ea38d6b7ce450af626971db0ff1b0d30"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "c5544cd0653d84e905a54dd340029fb122ca52fec3e8b463d6fbf47b19c6830c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200400, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "6a2ad8fa3f73a809ee6be96abc72c9c40e6f97da2cbc0b51de2a44b61c042195"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 200400, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "13829ffcbd04a680825346bbc67af51818d2cad1c5decef87cbdf87a49131f8e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 218280, 384, 2, 32, 0, 3, 64, 0, 2, true, false, true, "f8fa41a8ee98a261cbe571fe6d533a11142d4e17583754b406a772182e761fea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 217128, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, "59ce69fcef49047731a14f8b92d8175a176bafb217a412d81c9be1591d380510"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "4d8ab503829791fbaf9e7acf31fc3a166a538107ee2ef63c2e4ea6eec06c256d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "4aae99335f3c7c4c8972f84c8265dd7330014263ec15f1119a937df54a8ed6c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "fbf1d26d61e9b570486a65dd57fd6556b7036494c28a0c7599f1aa6fff42f478"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "6b61460079530514514ce884e3713945deb1955c6ddd57129ab4fdf343b2830a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen", 218256, 384, 2, 32, 0, 3, 64, 0, 1, true, false, true, "23082fc9f5202d6fa9456230c33ccd3540990b5d43614b140010eebcccaefa80"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 217104, 384, 2, 32, 0, 3, 64, 0, 1, true, false, false, "05115a3fd83cc5b06774a9b7ed6513245c173508a40d1332752be9868c251b27"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "df9766fae1aa947b6f9e7934a4018b24a62f0d1d89c1cb22f43631f1376997bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "e5aabd54e4710d9c08f317eda61626cf3c7165b7f2671c071517bfce5c29f7a1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 181488, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "86e93151fee17ec8f5ff366105eedaaf95b46782e9db467beeef7cfde9f17207"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "89420f6920ba41db0f4ae6198ab00a9b0984178eebfb411fca84dce034ac93e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 179440, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "82acae466d005b29a102a342efcc1ff5b978b10d411eebe8813ce389278edcb3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "3f1b82f7292e15de65fdf7207ba3091972f9582463979d32f24cc9c8d8d46ba3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 218352, 384, 2, 32, 0, 3, 64, 1, 0, true, false, true, "6781a4a4769a3e28cac387ea747f3017e889336f3696e7af95395d1e2269c7ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 217184, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, "c3e9b9309007804ea36dbccb594271495aa598b1b1f714f6842c70315fa5ae08"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen", 218256, 384, 2, 32, 0, 3, 64, 0, 0, true, false, true, "a622ee1bd6e2100f2398da8da2fc78db880c30dd400beb92d8192e9933e44a86"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 217088, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, "622a470ce29644ebc98f5cf1721f34e617b16f1fdedd79669fd1a92c0d707a11"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "30dcddc1844ee554ea25d8ac178fe545b4409a566f21f20b108b507edaae6424"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "7e1a1e31d51c0339045618d99cfca4f62999626146fee22964d93076fd191290"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 165616, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "6a6b2d383d6444ff229e264d116368eab6426ae0423a740bb11c41c4617c896e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "80599ea09ae07946fe9c7e85923b85748eb8a4586cf0b51fd4fa7cd3d219c43f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200400, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "e1eb9234c4120647145aef28e2d4cd027519fc670950cf4778dd0ce2f907fefc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 200400, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "60faba2a23376a2b072252115aaa1902dfeb07d5a488076f6d07177ba179d069"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 218280, 384, 2, 64, 0, 3, 64, 0, 2, true, false, true, "1b2655368267280e1d59b09d408dc2c9fdfdd88b2bfb2be7a6f0acd5885779bd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 217128, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, "7ecfdb47e6da00f00b75e1155e760683979de1289b7cd7cf8a68104ca4c48840"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "b6a8efb2d16c4832386050b137db05427d48bb2240a06553f3de40aae1056288"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "f5e80a7e7718ea6e3a4d3baaff7459af204f79a4a751eaf9740b0c3dea60cdb3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "a804e76f5a47164402bf325a6a671ccadd7a1d3cfb54e85033560f9a7fef0b05"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "ebd636be95704c541d9505a9cc64b955d5853ed0aa01637f6780a2df182a2eeb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128Static2CtaKeepsAbForGen", 218256, 384, 2, 64, 0, 3, 64, 0, 1, true, false, true, "514ddcf3664a3ab83dba46c16eefb0de33fddb0e0486321c67f76380ae59f7fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 217104, 384, 2, 64, 0, 3, 64, 0, 1, true, false, false, "6954d3b13307dfadb4af0b795f2479ed4a41bb09c73db28e3fb27e50524e680f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "a0ba8774c828dc06f904f54ad3fda368d41f117e62a24805a67dfaa32612bab0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "d8ca3a8bb8fd4914f836d94a9b5c829c8683487b6ec50940fb4f092cdbe07c6d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 181488, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "84bab81a7617a36e8026301c377f3bf63155372f9968923c8d407f0fb9bb63e7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "cedc0fc959c01a61a077642946f0672162ade3cac864dcefd8c8f0450bf57cc9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 179440, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "ab951da6cd8de5cbbf841134d39cdc6694330f7aedc6a76706297ffb4d842c3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "82ced1aaf694f0da60b260225dd557ad273f6e86f0b61a0f581ee603cbdcec8a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 218352, 384, 2, 64, 0, 3, 64, 1, 0, true, false, true, "d1e18ba713993d932183c4f20f749178e09ef579986291b8a80a8ffffb7a040f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 217184, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, "eb43128db78106ea932730d53264017837ec72ea590aa0c2c08495ba2a0afc62"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen", 218256, 384, 2, 64, 0, 3, 64, 0, 0, true, false, true, "9fe3173657db6f3091045155964769ad882c116716c41f194422a51aa125bec1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 217088, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, "a58f07da4ce391af186df4af84667b0063506e3b7ff237b6216f82032ed9193a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "8ec9500bdea70e4b16268fa06b9259424f98a18fb91fd102fa9c75bc46e04580"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "88bd7b44e606d72f2a5a435ed3b299fcb774f43e016e313aba2fe499a8871f8d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 165616, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "6065748f65afcb9392da92de8ef967c644fcd6fe6017335a7c7233ebdfd95fc5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "f691f8b959541241af34c078c7fb0ea62817143d84bde9e6a7046116d4612127"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200144, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "c4320a086765a506e1193df58071f7ab5adaa0ccfec8fc30545e5e2a5f4f4243"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 200144, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "fe56ead8b29090a5c5a1898bba7669b01ee1f54f2b254fb18e8f09f70514a25f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 217128, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, "9ae32f54b7e1b0d7b1b899e79616314c55b7a7a8ad3a6bbd8407e2846e42ecd9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192464, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "bff8171b59c3450c05227ae26a98b960051cac2d7cda6c7b43c09c55cd2afacb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 192464, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "318df7c361f1e2b6a8552462f8c8e7d923719c6a8a167b1d2c68be4b44724f4e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "8fc1fc5b749a0e31fe88138c25a9f8df2f0f02b6a92b9447046f111d752762cd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "c94d27aa551da985c5e3090d65c7f52272645acd9c7e3c1f29951a29446a0975"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 217104, 384, 2, 32, 0, 3, 64, 0, 1, true, false, false, "abcaf7f8ec5cbd40ac602b11dcc7f1823ae8ac767207ada4d5230a482ff8846a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "0c38dc9c481574025b052db2d19d4ea1b70c21fd086823e5b277e7fd8d681e6b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "9ed53dbafaabb00d09a7235cfa10607281710a110989d8e4460ee040886ae51e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 181488, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "c38ff618728d06a4b6075262cd3092880c0595ad0845abb80007a3a13b84eeda"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "6433bb2362ce5510a3ca776d03ef0d98e8005d514a1b291b1b3d1f10176ada7c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 179440, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "b73a4bf04e9bb827bd215f15055365e70dc3ff483cac8e72b6fca221bec2ef73"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "459caa55706134e693e712962da6fb5370d0196dc1a9cda44b14d576a0e899a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 217184, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, "32c51d349c822ff6ea294bd9ad4d67b1037ea2bbbb2db1726a6868f782d90a31"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 217088, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, "95a997d258098ccf454aed610b31fe67bbf559dd9ec4d8a165886712d82f30ac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "45ddf53974fac6241800b16f501b8962cd8fe72b58ae15da26244f6e72e3a7f4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "f57f1fdb0bfc45bdb45ad6eb69ad67bbe280d93bbb5ddb96e55aecb378c11dc7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 165616, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "5544d8f94f68079171226a5db204fd3d0a92d6dab4a5e031849895284889199a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "7bf6a9ce90c029868e0bf0e347d01e1838849b560052002fd0e5b9cc70cd74b2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200144, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "7c9d37710321247dea580bf3cce0a24778d2cd8cfbdc4c3d9080aba193ef054a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 200144, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "bb7d2232bd71b20c9caa89453aa3edb212b3f175a56b5d78343d5cf215662b40"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 217128, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, "7443360f788c1072796a5bcf5f412bca1f21c4012778b1a03252fa1c638e037a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192464, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "71941c3f41b70814f9fb088e5e98da53c0417959401cf14c2c04e202f5f2059f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 192464, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "2fdf757c0bc089d9e77e28388b6ace1b88dfa87148f252836f2c87ef38e7cb9e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "ea7f6f6c1c3b9e97899cb7853678b10e4a31ad0e28cf9df1a2a3008900df83d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "f7107a2a39a7589905c260c58a66a138dbc43c9eb0b0baf043fbf95540f06558"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ64Kv128StaticKeepsAbForGen", 217104, 384, 2, 64, 0, 3, 64, 0, 1, true, false, false, "93d722b9cf4af697ab67ff53148dea836e98ce572e8a52a2a80ba688be3409ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "4b2e2c85c92d5331d0155beae00486774fa3e125f6883fa3716e60e1b61a7188"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "a6898577411d73b063456df76d611af814daed2dd2f46d292efd9973e8c2ec73"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 181488, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "2d481a2bcf1021cfefe9ab384e16ba953949b86566a76b291799b264acab1f64"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "1b8e035a8d359a327dcb04bdde57aa81e8415efe2a1971843a9a2e853bcad8ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 179440, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "e85592355fb679d0387db2ed77c804b9800aa27b6b9651704ffd99b4fa0f79ff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 167056, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "5ef4eb4a1a0850d934254a2068d0617996ae25d281e70475b2c7955250b42338"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 217184, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, "28ba2543e659ae15d61807b83214bffe6857c26b777a379c425473714c7c991f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 217088, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, "0d8f965b64e9e7c1ad000fb8d6314663f803aafb57235775804eea8649368299"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "67767d79a30577756e1099bc6b77837a7ed9a363068f81c403104a02a5768249"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "df38847d7e6ffe1d2748690facfd30336833d0a073a98abff920087b6798b9c2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 165616, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "35f5e5db9286ce0f940c501b23a5a9693902bddc112d36171f658e6ca4508704"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 159376, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "f5b764553dbee9eea96fe9b130e7a67f23d730f0168d4384c1f247a9927b6ad4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "c057b2ba00bd5e1f9f0b976bf03baaa5c33d01ed303a3274abd90092ecba8e1e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "9cb090739ec9ae4c4523e08101c00f38ff5936b840dde3d99f1a2cef62a1a568"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "6576f5ae80900a90625faa98b2022bed9fe64d3e5fcbeb277a97f3248d4aa37b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "06bb74b15795b5f4e8e01314a8dad285f3ec87f3e195c40f65d35eadd10c2477"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "8ddde6c9372f6fefb9624c3a4ba66f38adc15a97c580cf8b52877b874392c9ff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "13054212e247372a03943309490788d70f9284df760bce677f6d4eeae6080bbf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "826764bc23d339f599dde11bdeefcfba910fcfb4767672d6dca6f890c7def221"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "81391d4b2fd2d37d901e0c4b187195913a39a32f69d69859cde6fdb2d98913e4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "2e72b6ac5bce291f377abf08f1692dc2aa892534cefcc309816f180a2b9f55eb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "f43e214e414c68793c860c7923d209132e2cefe078e98b4d1e7de7483e3f388d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "8d6641f8058bdd0b16cac1fe51f71c9f19361893dc6f1ca55cbff6f4316d3ed5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "badea758c19208fa8c193d224792111282fff562812227e3b4a786ff8cf7e39f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "1bc9c1417a015416c9c0e02ab518716787eba3fa8497901729ebec24c0005bf3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "46a31fdf75cae3478a1039e27c0f3f1d99b3102cd785c4ad9c2a0ec2c8f41900"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "031f6189f76f1debe6ebef7bff60775809d1c43a8a25bb4089aa812159faab5e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "f1f8cf7f26bce936297a346d890b8fe4a76afded14c5d5c156172f5b38ac5875"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "f477a3b90cbd6a94069379e378345955291af105ccc1f7592d908011f6c35ce2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "8453f1e3332d6fa41de818bf9dcc3f1fe950d7bf0833b0bec314d1ef50cf686c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "4ac86c5cc6323676fb2565baf0dbfc2e10fe7c5c0c44fedd49150c717fdd0102"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "6d3c70dff21b8846543bb278d0bb6dd5bc88dc407a7241e7660e73327bc10bc0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "e1bdb6678d15f4d32896c84305ada4bd6ccfc7533f4345d1be5dc5b1ea406491"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "569d5528f85e436b070bc6a4136227e2257cbe4e67b7702cb90113115461713a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "2380bedbd701653bb03fd4e696d6ff3c8439a8affba3978c38bfa383864797cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "7c1dc253d962cdc7922d725294c5efc8fa286fd34019318fbaabb770b0adc7c6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "7413cc39417d84b44ffdd7b800572c9aa693704a188a8d9a206d3ef48ce0471d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "685561598568998d00f168f0956ee556bc4ca1824330f95eb45e24a5b74cae1c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "5b190f9be46396b5c21e1930684758567593dae4a94443c194e5124c2e33f537"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "5232814cb8939e82a9012c6f2adee47952a2a53870bfa6cb04ff84c67e160cee"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "240cd53792d63ec1768d4176a4b3bdf0c7832f169e689173d3252de2b052a0df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "2dd7da8cd27193f6c30158e12fc93a97cfad72921aef24024e3947e94e1daaf1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "758b2285f6f321245ae49fedcaf252eb36e686d63eb781ea1a66ec84d19fda3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "5cb8ec8d45fd331baca55ac8244c3d464ede49a777281ce50db4f9d7bbf7e55c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "977298daf7b0b7b01a3206a18220207fa30e683c3edb463f8001662817dd0e75"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "7b7d57f31301acf2e65cba413521638e084f64a79e67c7190859206ecb382985"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "809a420ea4248f1981ce8fc2799726fee54ebf97adf1f917bf7fb65133383d51"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "b91c5c99ce7f12d7db1930d16eb53c34446104f5bd498abb70f7b9974b5450be"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "49dfadbfaef9899e91940b994bff2fc4dad73a7ad0b76ec341012b8a851feb22"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "4a8e84315c2c769cd2d0c7f5ece030812274f166c0b93ce7fbd0dc82a67612d7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "dfa1578b5c95dd3b7fab5d2f8d2cd9bd89dbef2db521d6302f6f86636c620fe5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "5c85244fbeeb70ff24936f9fd6a7062c09ad455abc7137d62c29762fcc0133c8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "ec2709d00f6d9430dbfdaf3b1b148bb6af3c5ff358f0037d1622d0a8cd1a98e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "ded6f71bf8d8668da2eed431bb374714c6a89bfc42a8c1482d5f9d57a7c0a1a5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "9f9ee05fa2327ea66230e0fa150ed5b4703a317323260bcff54c0cdc49be5835"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "748c0339cc9907c9f1e659584b7cf6bce5297b4ba791cae7145cc7e3431959bd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "746b2b2e76f4660e4550f58fb840f36c98d4fec6ee4078ade97aca8cf5b3f063"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "83cd79ec8f667aa475ebfa48d687fadf6816f102cde66147cc4581a5034f757b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "85bb9b130d7c26331e14296dfb26a987b3a9ecbbd7531f2cfa0b27b607dc0c4e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "0cabd09aaf8cfd23fefd2a02da3a4e03b3f1feb52b5b9922d458c69f4a226f7c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "a7bad81a2b4d68916bea27c84dd37020aaa8a17784d1f07fcf270b6c9e033201"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "857ec5f6abd9cac1df49ea869f9f6e84f1fceeb054d4d5b22bb2020eb1c0bc51"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, "3110f6b78d760c4ba97996b3a5d839022753aff65c73738f476458063379ca27"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, "ad8b29e4e1c9b3859392cc23f814376838c7faa082e74bb700d66b4a314358d2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, "fec13606799b2d95b58edbe45bc421ffe02caa145aa73ab04f67208121f69672"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, "cf04e04c683cb2814c3eedf86b15447262a0341cfdfd99a1d4d91ca1610ca5be"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, "d0e3c8d8c88d54ec98d4143b0761095f64e8c835fe74ea5f0c7eab6604481a0d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, "b755ba5463ee23495c2340c395b928668648e2f88e24690f2ea6e69cea45b22a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, "9da38ecd11c0ea50f9d6997a9cbd6d388922c420eccf31260bfe65dc5cdb46fc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, "0248bb8603a76bbca981699288e8bb22ae16b9c0b1adb479f9258591315bbd6c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, "c4640c4b41518987772d38e40e16e3a2c29e62db23a70a184877a3e6c2192532"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, "afb8987edfdd8701d54d4003f4d9853bfacd0b1b0bed83f21b6001da8148c9fc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "3c9c6cc91eb5b053a04e26fcdfbcc8850d26ff3fdbb493003ba3262295e6f0ea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "cdd3f21e05c037904619a0015aaa1083405119f057d0f63f668588dc73bf2352"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "fe1c1653c3019dfb298a16b28aeed869ff3bb4790cdf01ae11ba0ac8aec6645c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "81561a6201d0dca91d4ddd2c211d636f04026526208f559bfb5f06dab02a807c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, "4cbbbdbf1dc8d5aba809abc9d79fedb1756ba061787b3ac0f425ee83d3d3755c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, "d3a659d292edc878199eb521ea660be0426b9bc3eeb0aa416014f2cc1f712186"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "0d403e8b6acdcd63c45cec8da69388541c48cc71c98037afa0eaaa1581197179"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "ed1573cb216b95fd8810c0029c6135b5a246e5fa8c6154e271693a85d6faf3f9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "1d4f3b60cd1b788658a336c035f9f2003d1c7c15279559f0cb20bf2d1c260bdc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "9cb7bf1e93f8a18fa5d2df5d12ce5b56616cdd74b6c3581f8c16548b6a5a14b1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "e21ad29456a9a796f6b45da8de2debe034740c2d86c75ebebaea72a8741802cb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "1821be8f31e856f96b28fb02e418833f3d13768b75e76860c6a67c7364b50ccf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "e07b3d1a7661ee9f4cba40977ba87bc0b7e83db6783f7d9442500e17e35f699e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "b5955887bae0b9e91116c17894cbedf9bd2455621bc487bf7052d782749185cf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, "3b7af15d5892a6065018adcdc8856619a1ae4a1957ca88ee05f7093e52909ca5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, "86423f711afaaaf4e2eeb71a5c21381bb13a1cc238fe8a2c813004d6dded0dc9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "ca0c17e07905a61947c8ca75cd04ef3eaa4ebd17a056fcebaf926590674d30a5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "b1eb92d37d0b3f66f8fa8a85bbb965feadb7d1292aabd2eb5c47a91603f321bb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "b6af07196971479423905f1e8b59a478a8e65c7270d5e7ff98fca958a61d8487"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "537a86e49e6bbe0b45a2479686f507468257c63940fd84d3f9f9ddcee11c706c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "f3c98ca9028fc9d9549599fe99b9688f23d3cd25b9a445521020259e08dc4c4f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "1692d455888b36ecd4a0aec608c39272a21f27b187313ef7fc22cea2cc9cb225"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "d19ac5726a8e3b53a7bd23525aa1c3922413ac52bf17aa680834badfaca4d1ee"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "980ce920d01c4e8f6eb81f74d194064d931a15f8e8d91591bd75548d1a537f54"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, "8642ec9ee06a6d05fa0fbde1e4e57aa3c02cab87b111426d2e5bb9e07556fb52"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, "b8496cac58a2061dc9672063f9d237609aa3daaf69683c65a925be3d69ce2597"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "2ba0c72eab99c84395f6cc2ee10f60319148c700fe1d6365f516ffe0d143ea2f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "031b08ac9850a8e925fb3b1f2237e85ccf4690f2d1d154196f7d1427465f3b11"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "a684e9be4d16239e0d26d631e9c0de2a719eff3dbca117b01d10991c47fdbd2e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "05b2bed6ac35a9146dc4c6be7f16bc15c9a716dca41dc975f25c4395f5dc7670"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "29982f9ccfe8886ff392fc39ab11520bd86cb264b6d5656d3fde75abb787c4e6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "20790b6fae1c7e7a2fa12fd63c27b080d5e070b1682b759e978bbf0dcb5a930c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "b252777d6810a95435539cd7e57d88f1d75b2eb92eae50384421cb0909f38288"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "809552dbeed463ac9d1def9fb5e47bc01b852fd700499afe3699194fc1fe2406"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, "7b2893a6b0e60651d5977c9d09331ba4c005f4849bbb299ffc04162ef13a66aa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, "b911029cd78b48395b660ef11e468ec716b08e08ec13a1402cd5be1ddc0087ee"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "73c28864c5eb81961e9920e6214a655b623c9a81a317cc32b33d37521041b7b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "ede25e2289f54caf958dfcf7c38d990742a54c66d19edc31d51e2df9ba9773c4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "badc140c63a49b34f7739ae58f5f598dc73151d8283f5adf9c3002db2002bdf2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "a3ac6605f6b35ce64578161633197eaf6538b14c9fd4d9f02ca6b0e953e1cf5b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "f2c61cca569f2cb9f402a0505a4c0fa3ce95709bea87824f577398ea0a14cc75"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "071613aaadbd6f779c9e5d561b710b936c7d088e3d83b3322a3ab11b032704d8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "037ac9d539bfc998c91e52aef3fde2a71f9a230280f27544b665780d6fa238c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "2304c1d63229facf14439a73954f555b99f2f549ab4d28afc724509f7fb364d9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "b1c5027e1ea4eafd7232a809b7cd354830988977f511e706bfbe580a2a1e49f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "5ca9ac9cc1dae46c59d41017a2cc7cc1847385fca4ec3e4d1e0ad74f51e7e059"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "a199a697278b4bac78d40fd1a49fbfce2a400d2e0dcfa16bfeb4d61730f31410"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "07dcabd626022305531ab141d94afdba4f80123ef8d82d09e69b6723bef5a73e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "ed7a71c3d511e6b42c1b66bd6a62bed6ec702411c056d532f317253af45db7e9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "d9792655d849dfb11c5aa64e4d498e203340242133c5ea1e67d8e673f9a6fd32"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "68e8b5162d449b0f08c92f659547d544de53bbf7e9d956e03763fca3034a8f8b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "fb32822251e912fb45bca12211510d658215081682fe7bc0c4e3ccc08c3cf394"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "b721ffd6d294d0be1fa2ebdcf044965ea9a27287547e2e93d0dd357ceb9c6cd7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "d3b9892cc44f4ecc1e3d62329732c77de8fa264950cea09e15279cd9776de1b5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "43072b0bf180880a5597f74418e9d5e5c17906ba9ced556944098c4c4839a0ac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "04473f85ed58b8f387355adaf826e8b8f68a3541ac6a6b656d9393884a7697f6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "fb240740c48e7b60dafaaae213a35b9ca090fea6d4844b2ca81431dfd57017d8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "6650c4787c40fe0eeb388c1afef1490b92eb40091483cceb25da47951f62a9b3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "bb88d9a3d1a4445ee94cf27de37355a95a56e1c2e0d68b7fb76f54e1a4a662b0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "b8f17429b237b9625b42eadf51c8431aa6e5e1f3874909efddfc8f92f13bee33"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "36d497b6d9e0721b365e106699c66fe7b9b2a86c0c2f27d51bcc2e23aad7cf67"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "bb3fd154ba15fdbcc72a35055629670b8cfbc7b3a9e3d44689501a846d67fd53"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "f763eecd6aafadade967234cbd1d9e36fa609bf7c50da375f7efb3ca5e3429e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "9058b6e186ed5d8739fa80cd4831c298d448afaf713232a04866531c0d4258dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "8a4a799ab93235a6ede9b5fed43b8ad51eaca20cec9b22f58f1e13bbf8c1af60"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "161634f2b53d67d1aa1b904f1f8b0c3bb5e0a19e3c83d0cb17f40f041ca1ed80"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "a80ffdc502ded7e0bec54bed74a89f37fa159977daebe051977e96e40f759388"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "f0b41fea1ab9d3dbd753dcec01b41e30fd25528da362f7947da135275a9c15b9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "b72da48e33bd29f4387f84f8f14a289b43ba7350aa1235f23a3b10fc4a1f0b75"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "d76a44caf9574cd5365db208090b32b6bc3fc61b1f6b40d9775045e62da08aea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "e118c95217725ed94b8dfb5eec73d10dec2f53c4e830dbb632cc651e891ade6e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "3732264827aa46750a4820240ec5529e2b517012fb5b4449cd9f39e081eac89e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "3dcbdc5e79a7afb75d66f5ab203a55b10b22960dc2be0d19712a487021d19c28"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "ef2f9704610aaf11837eae408b7f225e745b99b9694337b93a0fbda508622b24"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "29cb11764db1f1858983adf8fc5f716ddb28c9868eee004f3303734f664fba99"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "1c16e30a69144feb69b592e7d51ecd5662c5df9d2de64ffaa0f9205bf070b7d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "59e93b85a82b2cadd2a4656fb5c19cc64d9385d04730d7eeadd43c391b773599"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "684fa7faf0566a8345b4224d1de05261f4e2e9bd8cae41a0ba01f43448b0e3af"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "6cfca56886e53883adf4692f9150206423c5a5ea5d28d921cfdcc7ff11d90e3b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "8391dcbc41fc62552b2d0619fbdb5eff09a75abbb5962a01209b8225068ac50e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "2572c0ba6af4ec43fd2bf53b48d579b3013932ffbac719ac58250ddee9b1bb6e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "66decaab25159af57d5dcffb688d4d4e468a2e987d9d98814fb3a50321388feb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "0c40cd6709b09c33d27db861af6cffb7042d6334fa9975088eb52982d7f410d9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "45b57b30aa673ded5c6f737a36b18a119c82a18eb28b403d17444364f7a58e9a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "7e819bab715357d73cdc9bbcaaa1997fa9cb3630e07bbb0059bc83e5202c99c6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "545f04a2947c30b3775310a1390c2d0f80dc2b16811035331b2c9fa2fb008832"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "0120e57feb4161742405f920a01c61198648bb9c38d2496cda210171611e1623"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "c59636993d2750d5f098321f69bde38da9c4be14b57affab7bb42c1dc052ab53"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "5446ec52bf511569412b3ae210f1b38937455fbbbde130577ebcd405a385a99b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "b1770214ad458df390c95a769e92ba8ca3951b71552769a08d5c9153bea0ada7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "bed79d394cfbd43cab73b27a0a5440c15f9434fd0ffc551024c857ad7121e289"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "29e952ba8d007e79b9e19154db6f14ec48efcaca148ac0ab8cf46f8805bdf017"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "be4c765ede1b20f5dbd67645972f91987be1703a6b0dcc42aa3bfecae347c99c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "d0d6374bf00f657a36dbf144164b393365d9de315babc8218a1de2e8ed006d5d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "0c3259f0aa960b27b3688e6a088d2b2e643cf17fbe19bec7edfda46f580ae3a8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "976a46ed4ccf302ceb9deee28adf517f3c004c61f9b992ad8e475e27862fa4da"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "130a2024c99c0d06f07f9e998bb93c97e51144b5a77d1a285463c96efccd98d3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "14d483d3208f6deb50ff36c7fb9b26da5109a12617ffe0a2114fc0d30b1193c1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "d433de2c08cbddc74e1ed9112f08ec3d29474818d389f7eef5a7f35e4b63c9a0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "f5f70a05cfdfa4d4bcb52cab1eb02bab7b2b00ba1314f5480f993bdf5d560553"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "0c0c222ea1f0ca7fa42914a65285073a11d252832b6cf62d1add40f4e532dd85"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "b9bbe2a8826e9d1b0aa04f8e5956007845e7382da29fde3aacb549582d9f99b3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "24c2bb29e48f25908b12c6d5ca00dcbefd3d7e54582a263d0ddfc43cd7461bb9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "13410b70292da4ebed4165ce59cd616361dcb5c57db0aad6d8a7cfded18ae787"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "850d957173740fe12e3ed949362baa451528d8837a239c9e17c0e954aa7423a6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "73dfd851fdb32742a10331063c9220b7d5314efa198f48f2f29d32a496f5673c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "289ceb3b04bfac3724215551da29a35b52a65479effe28f500ea68cfe595ae54"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "74a7d4ce0a5040ea3fb69784cf4ee73209cdc669fb56d0e39b87d921805d5b1a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "5f48240a21c5fa79000c8b8e5e9ee67b7610f74dcd83b954fd52853f0203894a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "875343fcc5f857944a0b6a6f99f11fc7742696c80134d95bd918a9ab2800a5b4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "3245b5643b4e0e3314a4b8b34c322d57cf23f17cf098c69df1ecc1a4bdc8d7ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "58b0ebfb9e0ace91afc08c16032fa1d9ee5b50a7a4e9b23c6feb65063ee81681"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "d68db919d358ace23696890efcd0fbe1110ecba9a8d1ca30917b017bf6db1af6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "31e486dc599f39c68da0b5b18101455ccc866b80aa878ca1f726d58d1155391c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 163056, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "1e6d21a7dead64c611151b4e39109e5a869e7c06578cd72a533cd5de0c50904b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "5b7119f920f8363e9699ed77b8041d80ede5f773dfb263bd8e6be80136d70fd5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157424, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "271e1d07d49570a4f6c7cc81a4f659a3d746448c96553678997c97ee87cee806"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "cc9bb77d493102c77c5ce8f32135403e7df2ffb5628f0ea9bfde0b9331672372"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "e0610987097c4545d5764dec63e31c1cd5806b591f565f58d84e453be24dde92"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "6a4a67e4b03c8fb50687e63f819f8925f3f075e9e27c61e246838c7ff694c750"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "5bdfe756a2308024b75248363d3cd39a64e82078e42505cf43be8b2f6dc647a2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "0a45fb54b5bf7f402ead4209a03664d80e915d797a0537aa9c9050db629622ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "b59319ef3dfd56d2d176d746ef8104e4dab561b6f2adf61024407110be49add3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "e3402479eed5056fb03b21735b85b296c7efd32006c3dd9bc8bac7428f3f93fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 163056, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "5fd9bd8d58cadbd431bbbb01b43d914c54702a8aa0271aada9c7da06cedb4b4d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "9a41db03e5aa5684baba2edb6cedcb6e6e2a124ec5d40ba6518a2976618be24c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157424, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "42c237e58f44e4a4cc3897c4d13dbcee703ae3668fd517d3bd9ef981d0906fa6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "0b24776fae9bd5dd2217c8d4645a8dd366cf16c6ca2c8f6d20b98ac64abb5490"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "fa871113540a49152e7d773d4f5b1df02438f59c88dc36c5a30ef948ca14ea25"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "0089906c6f6e9035b270a2b105a301fc87a83ae87d9ff7baf7700fe4dde7f661"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "dc45f0a44bf38c8e3573801d2c091c3a994d52bc237c6515308114d0cbd7dbd4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "54b835006c57747cb91bd2801e12c42134e7c92df1d4d0a1f64560ebca5c7170"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "09baac55fe7b586eea211b4f4a0430fe259f6f7eed35d4821553f73baa87db7f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "cd215c7a7ffc7f60b44e105b9fd51e79d4a2f64c31abae690f4ac4510659a6a9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 163056, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "996035815de3bad2e60135d68e74db2baaf61cd6862471b3c7374b3227db63a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "08d18741428389e94ac7fc778d142e68f1af63b53d058a20be6169e7d1f8b1a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157424, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "e70ef5747c1df69b17fdfbe032b3d4f41b0118431ca9a1194638db4a379e13cd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "823faba89e0871dc4dab57df5db41a33dff5eb53f5160dad158ac171939e137e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "a76ddaca1eda47356325323cb573b78aed692aaa3741ec05aefb92b75377fb1a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "384ab9bbaadec16f0c039eccd8abab2d2219c2278c1290ec2f0cd94bfe87d6d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "e663d555c2637c17c30c93e9b5a2d2f31a3bd380479a4240cd38131162e31fc3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "a9dfd72f9539f737913a7ca2507c9f9a0dee54cb789ef9dabde7de78091804fc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "f2226e3b72539aacba0e751ab8ae07b668d1a6e82d85ab2101c078d3e1b4e500"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "f462db2109cb204306fa376f27ab66a61ceb1286afebec2bbcff9d9d61d0a4d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 163056, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "a75a1fe648509bd01d5a6cb0518712c80a112dbb25858dfc5134e3df184a5e07"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "e70fc9e752303a42e80c69587ae4ecc88b4e3c5433e02480ca9a3c50d68d67dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157424, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "f9501184657e21943ff6ca6b56fc8452d58a81cece5785ba4582e7632a1c8eed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "6b58ceff9adb9a433168f952fc06c605533665c6ee304def79fe5a544c6be4eb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, "607247986d3001ae522f46c1fbad7af1a5108b364015adc3b7383898696b2b7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, "ef9efd13f70358dfd2f3906a88250d74f7f9e2a1c10a11391e133b07dcf3d708"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, "f772b2a8fbe17c71f4548c20f92be94053901d7949be619dae2c7a1f99b3702f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, "312141b42a091434b469e49697f35dd9cfeed5eb94f38b5c739900c9b056f9d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, "e027df085dd1a8c348c440f0a6494ad387832d8c46bab641887a7c6ba1023abf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, "d72d81d3c1c2579d4327554a7ac2d56437be84f15728d74089d0b4d9984f23b1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, "63103d46b893d5af61d5536e2e5ee569adfdf5f6259f9c1f31bff86e0367a303"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, "5e70a752be8ad34ac0313226c647b8f9b32d4fa78a56e5c3a0a8518ca57fb3ef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, "c71545953cf06a2f739dfbe7ba47c2dcd6f0849db00c8b6ebc93e783955460a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, "30cbe91eee30e315d97cfcad1b4c17da0ea140c8f1a3cff504868429b9275de1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "116962d6020dbe5cbf249157c8c3bf2b2f456bfb4f87c0bb74008152fd324847"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "e5755fe28aa795c077ad8ffda7605152d779273887e876bd35b5d1c1135dddd4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "7bdc4bcd7b098db253dfd892f33bb64ae83ce48b73554ec4f6cf553daa33ac33"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "d510474499da594324b0710f6cb9da5e46739a007197c88c6d06684c12ed3417"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, "5be89f01749b778b1325d23d32370c493218b7db8efa6ad0217278197ecb3fcb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, "2b0e32d0702232306970f9f3c3969f74f2ff7a6cbea76e3b151318b63a6d9c72"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "73c889887565631b90a2603e36cc9652def50b6d9acd4c00d62d0e0821de641f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "0778d57855b11a6dc786e1762e56a0eb744afd864d8299eebcc3a0790c55211e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "1e8ef98b6ba39b099c69097e94420175679494bb4d6632d359ee16e305cea8fc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "1cb63599f86896af1f74cdcdcbaa65c55aef94780d225079ed032b2b016638fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "1e86661e842d2543fada2133fb67580d5022f3c524ea58a0c9fad147440f9715"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "7177242ff5f75d9cca893e86eff6c66158c2de0805a7c47af5d7f092b75f747d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "c2de2e995dd3df8b6ad7206cc69954a39441970e73526ff7874e60ac2f445c91"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "a849cdb2c507f56d00540f0c03bc250e095e72c1371dc0dc55266aeca41fd37f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, "ff1b100554f2bc5ac34d19b3d8a75fc1bb75c3d0c1be8adb98022ebeca37f0a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, "b53a81addeb4ec0d305d539e85b1d0bdf6fda3669cef4b4b87af3074378b3fe4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "8ac160fd660fa7d3a656e42469bfdec6761bc1793de4b617eccda9c957ad38f3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "711fdff899fc1dd8a0419d278f3b8db4230c4b38f616623cf846e63d82d203ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "59a8105acff7d60b799d3cda2a4d78956807b0aa81036a00dba27d0237c91316"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "935e331d6314ddd82cc94e4b9fff516f2070a73d2ec6789a271160a8f7dc9c37"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "bcf4e78ce7b08c75bcd3f602a6a73b0e6d5e808940951be6ba6ed6b6107da4c2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "54b31f1b823b10f42cff2aa87a698de18e7e68895f339857257361ac61ba39d8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "bc25996aaaff46e1370f10cc26484b22cc45559807255f2bd5b26dee29f45fb7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "0eda38204b9e003505fdd7bd5f6e12408378a7ac7d10579de133f64c3fb1a3ac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, "c52ec4cea19c0810cb3848aadad77f4e1ce5721c2568f266e4282aef7f8eeed4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, "65ae7774953db9237b114dd170bf94e1f0f04023a9d4c33b05b6b8eb02230865"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "5e4a426b24b6ad6bac3a789712e5a64d667ac173e0bd786d61db8b276060fd5b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "0dbe78aa83143132b479a52b574dfaee23ae9cf86ad1510e75e49f075217e131"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "e70c8fd21e00dd994b2c050a7e2e4cdbcf60229077844f3bf57cefd1cfc11d3d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "e5df96b5c6c248c3229fc8d958dc7681209760537f371eddfd7640773675a15a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "00b1ac6b578f4ec68da2c556307e90c316a39dc2cf373957ee7a86f317ebb994"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "78fe509ec66eebbdbe7724d102b12508e08dfa365ad505c8043a25d14b620211"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "dff1f10eaef0d35711b22b089e1b8921a6a36f96d0e0cbf7a8ef7b32ac2f7cf0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "969e1875edd216b79e34f642f8ef379e0a2f06102ad6e1214b0e22c88d6d0bd7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, "7fff62e37ad88013a024b5c9b35b9fa8f7729095b42e5f0d86f133b55afb9492"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, "179744e0c389acd1beef894eaaafe586dbb3a12818e2f94b17ef5653d2539843"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "6f3f989183e693c2d55289550ab8ee66d0e5df606714de73a1d551ca6966af6f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "19567adab4bad103c87c2bd23a95c0b61e60006e4fe3c5e6bbeefbc0cf01c8ec"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "c2fdf0ef370368268cd6ef116b9665d7ce0cb9f6ad3fa91c7d8bec8d23eb868f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "e1d168043d96adff4dfea878ddf1b3cb4dff3364a58f288cd5ae1d7680b8b967"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "4c69b001ab9f07b5f6337d7f9ef226b80100aa8899a809c23bfcd0131ae87e12"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "75eefdb7620d35d973405fbf703d3c0623cddd388d80b41599f7ec40cd7490b1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "b0768107ec3d5ec67ef1f7d06585ca9f741af1029124cb29a80b22a3cd13e660"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "4cbf4d91338085d0968835a220abbf12cfa6843ab1f5bbcb9c4c12ba1a536b2a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "915aeb6c0a218ebe9ede0aa3616686ad3f89b03ef4b4e5e3dc89bc968e5b5375"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "e42e2d14d4a937f1de7f5f452bc6bf78d6a23edca49051ce555d4099da05e514"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "94ba5fcad61d26fdacb8243377c79cfdb70768ecad2fecbf704a7950db7c78ac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "e5163d978ff00014b39f648e628e1636208125d0a57838faaa7f23eb6d3f5135"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "b761a0bb11b436ea4b50f178aae1e8a97b9b3d1174998f2c0bc343e49e80be4d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "6183e23a4737d2813e4944c11d1b58c82e791be3a15bf3b9011a6403ee73fb23"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 194640, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "4f58a9fb815bf7f7a5bdd2bfc14315727ef1605de05064f8cbe4c2f2a2096721"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 159776, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "915a104e0ed79746f710ebb63013f7d336dac6a654d00b27b3b206d25c39817b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 168048, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "330bc26db5710de1ac0bc2c5d7477feda05c4671f203e4a5e7aa141e59e9f99d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 159760, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "db2444a98a4e678a34d14eac9fa7fb93ed3dc20a318bb7de937df83321851a80"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 194640, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "8e254a9927282246bffd1742c4d6ccaa8a64ae95bd6ed32ff702cb0811343b68"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 159776, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "7c3fb437f72e2c634c57adb7c90b15803a8cab814d8182c1a9a5203a983184b2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 168048, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "7d3330d29dcdd1da427f44d90f1b5427e94c0e6353422380d09adc4cc1ce974c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 159760, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "727cfb29fbc4d43899eb3a10f1a4005d501de49e23d8620e552e0646dd118756"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "e31737667d48536dfdfa4548cb676c9936c4826c504c6482c50fef05ab6f7218"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "98748f1e1feded99fa9682a89343eda5d4a1b2c4ce064b529b3a46956b11604a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "298cbe0987774921b4f391053d8f4ffe45624d58fec8e6619d80e299331e2b66"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "853c797deede143a3790b55dc71cd4880fecd9e3597e6e2f2f01651cfae3eeb6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "36422501c0a648b2aeabcce2ac820deba2a555858e69676a5fd0fe1b48239b8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "bd732c8410d4d8ff99d68321aa53fa17850fbde3d94be008f0cf6033ab617124"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 159984, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "2f60b62b73b7c6888ae04678a3776060e3d8fdd7152587c17b55fe930dc97744"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "7be8665bc75992df44d9f178554be11af9f9bed23daa50a92d611744b4f35ffa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155888, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "46cf31ec1816fee4fecabcc55432f764b33f3f9a37358847dee429fe08d98bcd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "8cd230ffced80716c018516d1afd2fb0b33560c7017fa8993e586afaf0c80d8b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "b1fe8ff37dd1e52a2e468065e5e8760b42d0de0ae95d6b9c18ef831dd1198cd8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "45b86c3c22a3a8b35b126a74de32a8eb7cb4ddec9c798ac8e9f62929abf289fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "3d0ad295461c110c7753e36b8f4f9f8a6ccf7035a08e5ca733a0eaac9e2f9da7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "9a4b9963a9f73466c07dfcf4f98f3d925f4da8825b889e18e15df60db2cba6d2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "d83b8c91148a02217633e60653b51cc779dfc5905711cf192658f07e9b903a9a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "7cd465d211619a07b4e772d3559662ecc579a7a9ccd323ddc57b9052c5fc7288"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 159984, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "14f4fc7070be6305c5b029c38beeca61474921a62e3e27c19bf0f4632d586dab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "e2988dbeb85b15938e40134f0ed644d4bcff50cc6f5f9aef379184c8698a7894"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155888, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "bf3607e4bbf93595a0806d819475063b1312a1342fcbe4fa9d4e74d0df16c08f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "da42aee7250d117f62b4a8a2b41595dd6aa53fcd068ddb6a7e6452c067da4a94"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "78719cd6db98c0d41b78ffa8a70631d517bfaa5cd00aa1afcd91cc758a4c712f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "e2a810120a1faff0b5c13858a6262be41e293c0cb01c88b69a33ebab4b4c1917"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "f5617fb04437194caadd0f85fee768adcf0622508b0042781a23fef21c7faee9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "d8425d17d8d79ed64eaebb9877807447db3762d2da0feccf9b1c67b3a5523e45"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "61597c4ecb263c837a0cf6e60a2cb9f87cb3eb3a4f530ed158b8f420177c0e93"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "a1c4d71b4744f3e9d7c8bdfb9ee3781afe72b91b833f7f4563cd059794542277"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 159984, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "b7935676d5e4007dec07586ea628ddb2add902e73dac224ef7605237446c2f6e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "cb15c1a2fb3dab2c5df7574488f13c26be28113026e81bbb73d37bc46aa101e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155888, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "485023afc62f20ae4c5982b372500d0a85847552c5a3fa43864a42dcd38cdb92"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "c3c7a0ed7eeee3d8be4d59854ebbfec964ded4c7774cb315a2cf6495bca6bb9a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "b39d63b981d9288d1ee835e0ebbcd5f06d4bf9f02d252ef7c9e978099ea6def2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "e2382b244278acf32cee0d8eafc95c3f6ef3891a5b017b4c7ec7e3fc64eff557"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "a0719ff06bc9088bbb6266d3e7c0c85714dd69769e81957143d725ec699d4e21"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "56c361fbd5935ec7453cbeac203b887089b27d6799752f78b70cc0d4dcb84559"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "3f78a158ba2814e280cb4d720bdabf58b7b9f8022386fea757dcbfe8d1637beb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "5a1c8d87220ccd8a664c2c24e33c323ea5db921482a32d4e6c11263d2fe2e758"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 159984, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "86e5106c2e8efda1a0db6bf5859689bc822d588cf636cf545977289490d8df8d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "a7d01f89d10761ec2cade957b3067665da67be0d0a3691e176a12e0cec219a8a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155888, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "677db00620fb23656c4ba2b2e8737899652826704261f3dc67a9bf18cbc80874"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "ed279c48fb5e445e9b4980e0b836a31b199e9f8c8091ca381cec72697d263fe3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "7760f12881247f32320c5a79ade3830ca419d008c7228fb6a19a74b55628bb43"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "f319c0e4598b798f83101b6a47024c6ded2008672944fed5194cfdeb6158250c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "bb7cf7212bd0284d4dfd49c6f76f4ac85e86ff3233e8a5548b379886a1ffbee5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "1b79e8ad32ef6c5a2da514bc03520b25d184fef8d5d3952c4b1c381c21e52093"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "5c6423fdf617b524868c3b30e8962aee3b1d5f1d1a43a8153a7a0ba9fadc624b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "ac34222e1e552a1c96ea8d80bdecc9440d5c71b8709119962186b5c7665fd3dd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "f8bbd444ac42add0dc19617d86c9fc3802ee3df8737a7b5a872fd3417a576c33"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "4f73c8a8180a647aa64d6e3424c5db19a71890cd9ec2ee64543d1d2dcd9704a5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "cd51a7942ceb4d69aa63ba79f0304cf782fbce4e1ca987587ba672fea9eef69a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "7b53f0cfc4f3f8560f56065e13c44d55ba2dce04945e0840075b7b339bb248b5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "3dc6f4067a06c906df7805f96de29de1ff11f3f545aa06acea6d51ccb805562a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "4b4fc65d49fc3e4a71fdd18c57c21310ce96b1adae7ec06c00e83f87f5176619"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "2a7d0cf57f8cfb247cc661d382bb92b029ac892bf8a2cbe4caf4ebac82a6b460"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "f5b1ba1d27404bcb2079ff05af861490ba495d73044375f14998eb284e800151"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "47353d8e436dd9073dfca33aefebf01577f5a736005846dafd60b2a9b65ef7ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "e45d7d27770425ead6f36780372b75802377740874e431304d8a42b7f7741e22"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "afa0fd4bfc9c10c781a73a7b4435854ec2b383eddee6e90d27425e1d094fc866"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "363163ea6948be9ff05705a8653cc98267af32790320cbc20c3dd5cacb176192"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "fb7fb5b9693a6253a373f283f81d9da7ced033cd44b7ba501b35dabfec197356"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "d2ef79f18511069b585c7694129cc922991eab16eb5ff0ff8f31034ecc25e663"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "8d4aa93af4829d2cf6d11b9a839f726247e3c8324e8fb9e3b7cb0a626c8c9bd8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "72f4e2d187c8646a29a58da345dab182fc23ce7832a2f5c7182646b0e26be7c8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "c3aee3bf16134e541a479746e05559c0d25390e8078d55484d0536512ed7d16c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "ae367a6adc1d63679d9707df2bdab4ea20fd9d211c06c62bb6835702fc3fa424"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "e3df0e510aa37d026386b44a7eb3be3446de0862936d8428a366055a824e09c9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "facad9b9d85d41b06fdc02151fd07f62835e303fc7fa7b645b9ead1ccd700e8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "495dcbd56051863dc49695c2d3dbf79244b818020128d3efdc27af5d7ef19f25"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "e566a77fd4edbbb8eb47dfed2074b5eedbab59266b70a6395fe343faad00525c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "afde0deec1dc0091fd6416cae2aba710dfa3aab333062fcdbe0321c2a322913d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "d13536768154481b9ea82027310bef4b762446ad86eeaee23ca8be3857eddc07"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "1dfb40cb95e0592471433dde1f4906f2bc462b500b4db8a050e2e931eb71da24"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "1b39c939b411d11931c4c6d84e76bff3d351235031ecf2163e4432f28162f29b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "cb2004ccf068a11f121fd860e3d63b4ff231ad02ba79cce4b33eee7f3db7ee9b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "e99bdc58ebcedb2533e75e4d3d148f599135a5cc8416643308a725662563ed76"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "4529f39467d717d35cf8b45510a94e9eeedda53746dd2b268f5ac6e81026da11"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "d23d171df5df88f17fd1d2a11e5eae0c82dc6798d845bcd7830fd86889f70955"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "98b3a2399ebe969d3f2d6a3eca42f19d302d63a21d5343874c18c00261ed3438"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "00bbe754d6f42e02a87867201ce6e92886f3ca5d089a7f071aef93f6ab3f9a3d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "003c3886ae6fd9fa2ad3ee236963822620b83acac673fbef33c0c288d3fb12b0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "c09499de8965e0bb688222fa1b5e003dbe6e4a2d65cea4811fe219a72cdd0854"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "c2af74880f9d0b9449c1f5939634f5c32d421e87da5b94018c7c0d69b43fac8d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "2864e6735ca45efe6faed64e608eea274b04c454f7360cf9a35b2009bf388634"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "891a9b0dec88dbd2781035a8e0bedcb86e5e4c17e54d0174d75d3536d9398afb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "292f049278ea7b48cd21b10ba020e3341e4f7cde20d69288ed61e4b83dd0190b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "eed18904111cd495ef88b0f86921af59b6cdc357f74175befe1cbb4ab12cc06b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "91f7fa1159bafa8eaeade4b0e870f5dac4045eb2ffc6e3d824ced02158c34aa7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "11af54296471424c2ee620c652e833e00e90ac42f347c0836f74e0db3d4fd111"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "e68d0cb5cf384ca47e6d9cfadcfbc7dffac275c90cf49de1bbbed2073872e9de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "9cc9b2180f9771f0f97239d67a5785dce53b4f9e405ee89bb361349acf8661a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "176d924bbd864326dae9b20498ecea0badb634f31887a8fd0e248e0f0c57cc50"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "ffe956ec4dc495385c28b66fa26d7cf2ef84b924eead647f0a702361c7edaa7c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "ac6474b1be3b391834fbafa187efbc2bd27f7aab781574b6365599a8a370f47e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "7654d235a1825ec29844353105f06b93f26e924eb280e4132fd4b581a53eece8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "1a5d6220cc402acd0fa9d0e0c5ceeeab77e55d390a20387d85b1e2f79da47e2c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "6364db5c84453c0089758d5ff98c62e533771f547aa97c23e8b2b562c398bc0a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 158864, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "46b2480f6e6e3623cabbddba07f253c90db1b22fc91b94534839918e03249eb9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 158448, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "a7ba8fe4aa7ce5e6e76381f7700bdae9e0a576ba8ac330bc0cbc9bd82ffbb165"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 155280, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "94e292819b4b22d3aa6d9b787e945ac3bed4341fa080105acf72b716ef4c993f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, "1d3a3e3b49d3236eb19b5df94b58335833e7a0b2abbb573481a078ccd3ccc96e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, "83a5aafd34aa8bf7d504967a63caaa3dd5b7b009604ba3d350435994762f46a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, "54f8455ac82e7684636205c04bddd76fed26cff44dca02d70491030cc40fba8a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, "4ff846d96d0f612c3596211aa2397d458516a1b746daf174e74519597da6f472"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 216288, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, "72ac3ac133fb58dfd923a6e3a218014a149d4122457e56d7c6d59a5944fcd8f9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 216192, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, "41b4897d3021c2fb874ce509610d08fb16b87b752ef00b523802e11a5c94407d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, "1ee1edc734d8c3f46e8555b3497cfe51262d46044d1f9a81a82addc2fb030d3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, "03e532a3fcb25e8807819cac213aaef4a981914ca3ef75becfcb4005bd374930"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, "6b5c5c8a08592b700d906e7151d393e5399d0548d0e226b02b361a0fe503a35d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, "4ec0c25111a1e1050a57281a100a89627291ec747faf18e61b66d5beb7468706"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "de96916d43a9d67fd014d8fbc68d04cf231cfc48deaaf85f845742c9f88e8206"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "301d9e5e1662f5b6d20edce2b78566fac0ab51f9b6f78fccae87002020cf1dcd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "3c56844c701ceb492116ecd4b0673ee67efd9d54e438ee25fc3513198f1ee7be"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "18abbb0d54f37e33d131dd679fae659ec4d4ecbbe425f3e07b2867ee23db6a21"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, "7b2bf636779fe19c589991a7efc54f4fc68532b0c6614698612e617754af667f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, "99d37ee0e066ce73350ddb5b8e762f9f8527545119b77bf9dadea933f50a19b6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "d5bd5fdebed63240924b965779d0666563335b686b92dd0a26a132f525b1938d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "ed4557c880599e126001df19886da367cfa3c04966a26b7cbc7e3df34a9aaf78"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "b7e013c7c9e94d1290f8f0ed96d49c642abaddd1b1a00a7e5986dd312b0c64b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "e80e10d31f2291923ff524471469060a673d96d1e997773e444c32442a6c5a9f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "efae860a398a24019e78a41cdafa26065cfece0325640e1331c31cdd9c057eaa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "1b41d845f3c6aa10ec7309b131b874cab9f4e57cae9a8f5b137fbd1d2a9d5174"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "1fef7a031198e624bc3e4362d7f2092470c108bb26a621856a293d95e339c59e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "9ad4677bd289f51346e454c597cc8c7a510750a4d2c33d66ca29fc19e08b44e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, "5d6ac5354f4286c8b33d4674f6f0442eb2d130b46d69e388626707228292e893"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, "0f4f86d13c47be56c921563a5b132932d0fc7a9abcd346df569462cb2e7e071e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "ef6f94be08f8a0deee8a1540d772aba6b7605d7164441536b5bd45ad0a139fb2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "b55bf8353275aeb820a6a90e1b5511ff0c3cb8d6839c61cc8cc02f76b1f2740e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "b88bdfcdc69dc7a4f8ed5db74087db1cd67dadb7ade35cabbb112bd7552287aa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "1e8abad5389747d978d8eb9ca476de3d3ad570e97b0bc65d4bc4bf0eea5e0379"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "95c1a8bc0438e72428125e03191947351eeae4140bea21fe63eed26b9edf85e0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "3b2eb3dbc4e5e3baa10e7c1865a5ccb409bc25191803cb45aeed09d9e10128e8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "41c1371edf080eb0d23548627f8bd11fddb2d9ab25f1cd9752056e1797f2d97d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "65f4ba2917da41d8493ef166b64f6c7d06869329c1c160cbb0faa74fa5f19994"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, "43d108dffb8d4141a85c021a8e2e3720a7908da1928b2823f136e0b5d675e87e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 217088, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, "6fcb566ba7e64960d22773328f31b8a9d66fee727b20f9889cea19eb842d98a7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "e93d4464f4a47132af3432ae5b5ded26e2e83d02183bcde402d3bea4f60e27df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "687b4eb7b727080dbd98932a6322ed4b773db2d59537df3e3981c0240124ef0c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "5c8fb56127fb7eac3eb75e7fcbdf64c51c4b50bf907f0465abbecb8c6b2040b2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "18d4412496dd7102f75ab94a56c8b36d826714df9caa687651d9d4833eb91735"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194256, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "995c83bac965d44f4fe0368c04299cd2ae2359e59ef4edf92054ab7bc3ffd1d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189648, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "f1e941ac7661a3385e08cc630b067f1151ba9962a8001ba3dd0835ff95f1d6bb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "7eeeffa763182c6603e1287c6b94991f2d1c29a41960f78e8e9b97f9f7dec651"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "13a1bf68bc1b0ec29fcbbab6ec57e8f4824f8e2af4e98c4063b168843732adc3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 217184, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, "1de6f2b2eeabdae81d085dcf4a65561a3a00940fb56a72ef18c054ac7d3da9ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 217088, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, "abf509b2061e3a67a4b9253f68a17ca53135871585482155b58ad4296272000c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169200, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "9cd567e9aba4851dbed2355321de12d07eba6b1a661a55751a7d59d449876d65"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 160912, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "4433b7aba22effa1e6055c186c45feff386ccc07d2c7eb2f79a7613fae609345"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 160496, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "0251b0baac894fbf5d83e34d7f0a281f74fbb454a5b71170593f7dccc6f2577f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 156304, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "11c760182fff00c2fff8a9d7aee597261c2cc92058635827c24836955ad604ae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "a9984b0d041cb6d5cdcb7b98ef4a0f2fb647048d91e303afe8cc20ed6883ca08"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "eebe034cce670505a54a60de74e3c6a930cfcc6d45ce1bcea7bae084d6bc4fcc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "ba9402d0528be18f1d65781df90eea654e6f10ddf877bef65131d0e0b65a1a9d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "586b71e29758d29ac82d36c8b8091d09edfbdb2afc4feec59e61c44955ce28e0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 44272, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "e1f9af9f213c93aed2af9b1ad7b259186ebcf586781acb1fe24c2c8340912dd5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 44176, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "253bebf5f9126f29d177f46d53cc9f3871a727f748cd855cafcfaa2e18875e86"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "7c2d14709727d8a9998aef474bafd7979a5471b2730d2401b42fb50fb9c033c2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "bd95701b55aec550cb9cf5c35a9be6a7c19b8e4a20530805b0250a8d2e9fcaf3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "e84f451edd3866c5f8bc4c9f5a60ae49f5238e3860599d2b1941cb74a7ba259a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "075adf446c95e676b46c1889f042ebdc3882a3305f1e05feb93111c7d5245fd1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 194640, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "96079b8f2da1162481369c98d201d2f7758306b3efb4c9f2340d43fc317b199a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 159776, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "8ecc99ff55d50c2160b50084aae41f3a9d4e3ca2acd0705668f9841df398bb4a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 168048, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "7ecd8c57c4e30df7b1f308a36327c414bee5b6065ba26657c6c3391792a3e48d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 159760, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "d8b386c449651c2b5be8d1473b781e55e3a51db27871e4cf60f22b681ab10831"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 194640, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "03eed8cefd5ab14b40c647926b4d086109e1b4e6d5191ddfbf7346a47758fecf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 159776, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "ad2ac76412025ae9334dfd038bc5ede91c4884bf46bbe62dc05e1f782a71a049"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 168048, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "2b8cc023bba2994be5b58550fda06830d9b16b85c58ee1b9d04f9ecb0fa4fb45"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 159760, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "ebe8d799e77f5d3bde1895afba6b7a5f91e9c1622cfa84feeeaf089b20cdfbfc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "4b82ba0fd68731f5882f356907b6f70dee74be600895f70e44bb916617fdd4a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "048dd566732b1fb8ca59deea96cf97fdde7ce7f1d308e006425619f2657eceba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "31cebc15e91e719b6a1c6320c2d6d41b1e80ab7b06b20a3e45138b4634e171db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "4cf6675f20e7ad5b3eee83fc21e5dfe56f7a1fca137bc1bde824dc218dd1a24d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "6ceb78d0fc68723b14e6d1a7ed4483275f8fbcd760eddd45cef985f352abcadd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "f0f438f7f08b8ddbe9d9e84780298db8d3d8ba2e1f66cbd9f48dd8ac3f8c8638"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "17f669af1d5ca660603d4947c8c96ccccf1b705c5a8c552c2aa17af121272142"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "b4767e16981fec3ef53ea00df2b6b43391129a70a296a42591e6a09a65d5eb2a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "46c686fe2f8f1d763fbd6d029d1c81dc4ea6f8285816078c5ac441529df94c0e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "836142217d8c91c78ce884925c2183a1610a56685d1793f904caf56985136123"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "de1b43acc3c36115f8c580c0befe14f75b6f70beecd48c0f1eeb7f9a2426c963"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "28e9931ee772fddeaac6a36be01c3357cdc02cf561a406a1348a78e67a19dc47"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "0382e270228c9f5b2afae1facf22f7fdb7c5b442edfb4a0c546635296dede386"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "f49f42443b3b2f29f6a33c5c04cd2152f6dc5af47c54618f533dee6ce15d798b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "493709fc89effd7677da0da93bb7905dc280412128e5bd53a21dc141e2821867"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "1159b127ebe57e4671dc129e343135c8843b64b956192dd3c94395ca919ec6d8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "f4940937b3d409e91bdbe431014c9e36231f27e4e65541fc9ec1c95dad75edc9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "7ba38e55c9bf2aba307b3c366d7158aec0e33b76b0fc3ac6042a5066ccfa2fcf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "24bfa08e9467d0e3e71baa4167a63778146ec7b97300c16a51546dce10f0cde3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "dfd3f851d4b64402cfecb351b3636abb90165893162d36e700ca5432b3ca9c27"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "f0a2dc5320ef2ae84dcddfa45abcc896154b8fd458240f1cd17f21c67b96973b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "9f5075511c6bac8e907cc85b7934b6525a30543581ab5b76f1c4bbe403b768ee"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "28d366c7023e798318ca6656111747ea52a7a5119b93211eff43b8141297f6d2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "063cd88ca99d9eae43618931e5c024f637d3adb67c8381728a72a056aae6c44e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "2fd8850e043f37aaf8904c1c4cc652e732bd2b1f505d69182c8ddae6a61de7f8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 45072, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "d400e3e19dddf050bf2c0ae0fd73a1eec168a734b13d2c602802b76c2a26ff24"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "c3e240ead3e2c426d1696c2684e908302e635371277c452ebecf185591548367"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "e2a2df8c420e00915ae866c9c58754bcd9d693b251b821dd9d2e7193f7d053ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "69534e32a53819dfabf0d5479f22019f47087ff20242466190100d4d8bcea8e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "e6fcaa0a0d420370065f111036f49486f7f78e394e6ac4cbb8492757a88b7131"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192720, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "f19089bb822dc31e9cb2ef64e77be6a4667dfd41fb0ed20e169115d73f4deb3f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189136, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "34e785db166a788689c064fb67439526a748b494b1326281d3472468687574f9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "6d59cb35fe93819edb88e02c1973a602ab45c2b5045d5cb519d84f78698bbdf4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "fb1336d01ed2c3477cea3dfe67f39366505b0f8ea5e96c01a1fa1c6252c8c15b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 45168, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "6398732080f600c7893bdc0f7209348fc5904410fd280f95628bad32f3467b0b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 45072, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "efd31a82429da38e956014c69e89e017da6fd28a2e70e19af48480456b50edae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161008, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "fd0731bdad2dc3c0ba64ec9bf791bfe5fd0b6313dbe4a99995abccb52d70a707"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157840, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "f6b428be6e0801aae922b8c240f38e16fa0aec8de90c2e313747aee0c4753e1e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156400, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "96780537fcadd4d7e3faf0b467f8c94374bd38a24fa223adbcd8fc5b0d68c8e7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 154256, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "07f780ff916ba17f052deb93fad7fa700dcf4a5006b09c7ce0e26926fd0a9361"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 167184, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "d863588e5bf8c0b2e8e6ccf386d473ec695e4b1e3772900af72e13df1d5f127a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 167088, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "1d53a6ecd65ac0617aeff735e88b57241e83ccbe4e9adc9ee9e5144feeb593e1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 167184, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "2cf31f7707c6c5acf6af02707651b2feb542c4c28e12fd1a25e1a05760a90991"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 167088, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "381f4631006c12b09e046c2e93fa0158936b7843a59d878998f6524f057b5b47"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 167184, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "f9229d6ab6decec90511aab618bd5d63ee9cb9aa328fbfed3807578de821ddb9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 167088, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "630fd49f5361597590c1b49846162fb43b9818e1b2156678b24fb5cfa414a684"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "f991c01bcec27f5430ad14fb74fdcd55e9b927b01d84f0972cc22b61000ed767"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 167984, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "67ba68204ed1c22795a87f3dca83a8645fda9668c7d4bfbcbb0d893f992ce50a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "6e21737ba71ac949f0c33f55e479da1f2e328ae4723be144b174c88b245ce830"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 167984, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "33c750ae146e721ac144707d3586f95761a17c08196dadb6b53e88f16909c5aa"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "3e8459da53ca83005a51f82ae8d1aac2371bc0bea1e2d63bff52100f78f7701f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "30aa0ca4537c07bfdfe19ce4007a587cba107211ebfbc56aee5d920131fdcde9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 200816, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "dc6be90586c1da3f682f5408c9587a2d5475b10dcda3ab8068a7b6d732583b7d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "ad783feb33eb9580d311bcea0eef7d5afcdf741f2652c572c7f35922a5f55bb1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 231376, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "4cf9062bf5c0c03b49c1c8ea674473f074a68e06ac1ef590ad1b274cd8946c87"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "ddcd1b2597e4ddf874f81639be66f2070f0f298145414ddbf30119a913231869"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 200816, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "b6af82bca76abb62a1698c4b11062d435f7dfe3a3535cd3d727ccdd33a4f098c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "0cecfbd8220a14b23af24aaad4fa0955eb54c1a02ac59934023f7ef53ae05676"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "8089c51a64cd5d1b18cadae85a28db4df4a3814469af46de49c878fa0e368915"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "f71978e884b6457da396d525c611a589ea651d7f2ff2915b8197f02dd4ffc281"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "b8e5edf52f8fa6c9d730b1d4330603a49b973b6f33e8d42ba5177f0ab5d656bb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "b29329c6091856a7d6e4c8cc32a01e0360062126bd120305409addd852f69f8e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "f195337660341f7104d3d01c442f454071c7c3f96ac0a2d8e3003ea8a193c7e4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 167984, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "fc8ff37ff49a2326d90d56b3e82d8fc3a413450e62e25a795dedbe14d20bb723"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "d2bc35e69ab3028894a04b3c421821a5b00f66ac0005fba2377825da375fedae"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "62414f797468ab4651952786aa43522426a068936e71fc53a447a5d68b05243f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "1196003500316b85f09d2d95575c819a91d9fe54d6dbf40837ae9ba28fc27501"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "7e8220227cdd263a384fa364c1116fa0d810c9673ea4690d24bdae11946466e1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "4122cdab63959f0ab00da31c6344927edb39cf856fc9e997378a7fe8a58b74dd"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "ce285d401e03154f511f8c49f8b733b94115cdf649703e8708a8a69f88d502ef"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "d24a26370b91d152447b9b0924fe82b7a09a7eed386305d9e3b12ef1cd39ce2c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "31aa74824a5caa73b4041044f70633f910babbfd2dd1edc23449c0658c6eb612"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "c8ec8165d7bf07aa20e8ce6eaa48911acd49f7ad20d4f9d97ecba96ae23d338f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 167984, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "abc74d4b6a4b938403be7c96e957f15dfff2d41c069f79ed54508a8b20dea5f2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "059659bc2033d85fe4ce13fb7b29d68ff4c47c1139f4a20ed1e2efe36bbbcdd5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "db83fcc8c7d8213a9d672c6067c4fc51585a852aefeb91210d694016bfa51ae4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "6c04eb2ee50ee28481ec0b66aeacae57164d5ec62653a12769e3215c2dee50a5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "0dd971eff139382a201cea83c36f4a728b7c38a9989373495d64644c2625368e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "03b580ea5315ba0c62feb10560c858b25fcb4bd1d7404713a0c9f5aae719bdf9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "533121a49d0c3f36b255496015c87d5d9946f37d055fe40e10372c5e2bc21571"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "65d12a761706a556bdcba857db8f1cda6599ae07c4766c7269ffa0044be22d78"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "d20b3320be911e5588d5cc06b9328ec17afb1ca7155e234f9b2fd39e8b8f3f5c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "017f47a5a12d29e49da1bd154f732a0a2a0f99b0b318ccb38ccbc7af782d0b1f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 167984, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "83ae4f2cd50b0b7d4caa8a6c632b45d5b39bcd4d6481bdc76befc44330f3d5e5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "0f69efb8e26500455f87911fff5acd842891fc94ede9391f97a3efba9e2b94e1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "dbc9f8091cda6105afafe8a3b73af885bd60c1da705b498c88d82730290a53e0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "af474d301ce37e27be3a5ae8fa2b846bcef057c7956afccf145af99491e32db3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "7d445a9a5cac8bddbab8e143a4aabf3a687ae1c68e40eccfdf70521b472588b8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182480, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "9af16900bf391bc5610beedcb4c30623cb209ade5319a46d0c9b0294ce48ea9e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 175824, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "b5a14b797bd5aad47f3d728aa33d10fa90f47b0c5f4f102ef34abc803ccc2f1a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "73029deb3f08aafa008f18bcd2822e9a4a1cc9445a6247988644b5aaa70d938e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "3b89de2e0026222280c888ac67bbfd5ed56ba51b1af4490d57eb89d19a7d4ebe"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 168080, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "a6b15ed67509a7886dc7bb122030c4948cde7d9beb4ef547b33fd1dcd93b3dd0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 167984, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "2db83d528314ceb693c4a735722120ab85cdbcb922961e534e5df9e6d5508f08"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 156912, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "73fde21ee2209e71f50ba7753a76e4a662c3f12bd3e9d15e6ebf144ffe6d0d6c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 148624, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "d4915a85f44da660b3ca3f873ada59badd7af02edb5a177876a8d2275c015a20"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 146160, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "159363cffa46098e343d0136521c98dea3280270efb9effd3699d5cea7fc049e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 141968, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "8b025b10ba6125fe8b71f45a0826938754b9e21b8da828261d85f87101e50d68"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 199904, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, "ddb3289fd26312f93f03e488c728b4fb1168e06365d641a13cb993fbc7acc196"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 199808, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, "ea5b869ca6ec731d2a146a60064f4882d752c2fb38e9be29489ffa60e0566016"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 199904, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, "7c2dd61a7b7b7dc50229821f24b0f8a45e0e98875af06716fe32baf3507f2254"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 199808, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, "47ae136f6ead7c3d0d0aadcf882585633ce7b1ee3884e258ae691548e35d75fa"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 199904, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, "9d3ab4d6ef65ca4fa0a89b6a806f790dc1f86b163cca8acac0e247b5bf52db41"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 199808, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, "c26c2635e319f7b1fd42cd8debdcfef38fdcbe0dcc524f503087330f6574bb73"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, "92547af1c3a79fb770cbb46c8c89dc195eed664a7fba57d0087d86f73c755315"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 200704, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, "4a74a72600c3f07b6a5ba1b0dcab6f9aa4cda5f2975386911b2da0f078953e9c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, "45dfed586495d23e2d877779776d5391fd85f08b2cbd5a0e339a2e911c4569e9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 200704, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, "0eb8bef96ee061c24893a55b5589a4d6251e167d9eb7e82149ce4c94f1b6d415"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "0fedb451950ff1c49ac728cb2106ae1fa7ac0eb6bc6b33452e30454170e41966"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "01df5ed41d2067174bb166a73a77f40c1d2f4a4dd5e64ba94d1056a2fc3f1aec"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "b643f8e79cd50786b5a8fa985cd564e8407febe800a79825aa5be6431d835646"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "c62f03ade8b87529aae3fa1b17e919e726f7f06e9492928bd8e6432984d5a3bd"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, "8eb6bc078726d9875ac1c0960406e786738b4a8c360dc81132b5b3a81d48cc75"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 200704, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, "67054b2a962675365d5a91c8e0d0a6e3146ef66a67b4dfd8d659b212e7ad652c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "7e9c9a0ac1b5e691c329026356243a177f974a8dec3fb137a1486cedce9d4f58"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "cb4ff5dd7397a12d3e47f8141d4746bb05f312d27144352cee2bcfafadf799ce"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "6d60a9448c5a0a2b519b304dfd9aa48c6963dc5f7f7370270c3f147d821d372b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "841427a180659427c6ccb85c5b449c1ef93f71bbf58085bc927423530f5acf05"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "c0bed65493ba225e3906ea2c4f1407c9edd60377a125768b897f0a4d3be75d8a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "624f1d2d324a842c7b01a8a6274f1b8c2003949903f597c6f916087c67078211"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "9f9a8cec0ac13d4131293e663a711f1ead10cc3c6d72bfa7f6748e9e38b54c5d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "93a328745f402f34877517deb19848bc821a9fb3e280c07fa98198234272c3e3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, "01f60014986db99d5b79205a79bf563f3de9e5230bddf6f61b3e90e99c0eef17"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 200704, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, "aa5f1742e6b270e86492aa660954f1a3a481d916d5246aa3b01c4e2cb585fdc0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "698bb9ca14e3da097a80fa377fe43b3078bb1318fd19deef24aaf2c884026365"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "890865ad1edeef3df2451626545bfb40ebe9231114bd2fa9632df5d9bc952e11"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "c2807f6ce5b3b30ee20586d81c4ec992f81ff39f109e4f96b76b73bc60130677"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "cf2023f3725a1e4419807d9d709b7caab118591990168d1aed7f62496c732dff"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "87cb9fc9b74d77e2e26d6e5b1c198a70e7a88a62197e52860354b071234e5bfe"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "aba21d7c8b389f749315ec7bf69065efa01096fb6c1f2f671d9a225ca972ba7c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "fb3b59763f038db1749ceb788448e7ffa090a976463239de78fc81dfd8895aff"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "34d9466cffc458911f896788004432349178e494022f7f4361a801c518bd3e3e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, "26aadb2074c5f5fd9750f7059807f57900fd0f6e42c0617e4246895c34adb5ad"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 200704, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, "fa711be168574dc75c704335ffbe93992043972677fd8ca4ba3cde422b7b7943"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "36bba9522a6a928048f16d4ecf2a53a90a2405bc87ef5115acd7db9651a39dc7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "b66873fbb023103eb3ac381e4933e6487f3df9e76ab302d29b525ba705ae1813"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "0cbfab8706e00c124687232075391ce6e21402060229c1d97737b3aebdcb990b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "7a1a6d82ef9ed0204ce7a9f1372a25d454f10517b6bf06c31e495b026eb8d3f1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 186064, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "a98fe40bfe67a2f3a63e79e149d228400c15ebc68897d5129be163b6564315cb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 177360, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "543b9e7c0ec88cadc74fcf3f66b4a4e4b3a48417335188393485c62e46386902"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "9620815d77b7ad1fb1a5f3b1b6dc3700b5044128d45f08f46f28e999934c1c75"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "6e1122260670f46bd74b6241193fc1ee9ad15382f4e7cb08caf8092f3c1e1b05"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 200800, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, "59a9420c37a5534ac86dec1d1bb217a5a3b6fd07fdf8fa8463bf238bde81719d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 200704, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, "651cd06451231b0fbb42cfd897e00f8eed3f4ad83b8f8027c970e0b8dc70fcdc"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 165104, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "f9287bddd35048c421323a74bbe32d882405f84fb2ee67fe86634a2ea93b36e3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 152720, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "aabd4bb9649f48673ee4157f6fc612b28707096bc63fba5428ff8f005f79ba0c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 150256, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "47ad38ed6bcd7411e01058e38038a3a020cc1af73fe85accc27e9e7a70419055"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 144016, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "7a6309c98c6ea3c7a93005867e4d266eea9ffa00436d3ea2e78b4d96cd1f8340"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, "c51518c2f8359f4ee6fb1b9e751399e009f4a2edc1c13a6492f6d4997368a532"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, "dbf43d1a83ed890a643de93552918f70051c8222a3fae5c773906935a6af228a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, "f1693f20024411ab42db9483b8d558915846bc7f44c0897fde80eab46ae422cb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, "e805756d60e8cf14e96121716d99151b6fcc8abe6379ae52d64d4fc520278715"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 85232, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, "6153245afed45383e0ab3cf37b48a247684ee10708ef2ffd11c57c9e11023260"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 85136, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, "9a9c544375a94eab84d2fda2a96a64cb4cab8df89243f6efc0a611e1c2f45cbc"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "cfe36147875ed4724195515f97c414e579064f6c4c4d47a603408ea669042a95"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "2f050c443f68a44ef170cf5e493af29b303e3497417be3091e5cdfef66ceb365"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "e6a5d99ff970c9c33c4e7bd638fde684a0112241b577d8df9d1960b56324034b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "ac07a5c18959e92bdb95e18a3a7f91f3b9329a0d8b60bf8f639362abb2524f88"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 202832, 512, 2, 32, 3, 3, 128, 0, 2, true, false, false, "6c37839a06d819d9bc1bd229f5919e451a9c113d455a721c3597f11cc4a8aa7c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, "e3a8178b4957ec8a56221bad06d9dc8502f95a3e767d198ff083f97a736d90b6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, "8b82cccc3624617d1d3953999977f3a8521ca18a58c680ffdfbfb953ebdcbd23"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, "e6b7ef1891d8633685a35470dea2f8de1e149fe349ca32dd7ac80ede79bf9694"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 202832, 512, 2, 64, 3, 3, 128, 0, 2, true, false, false, "d7e3d7705b0ec4b95b1c25549f8b11b16fad12b0b2ddc4958052131a754da5fd"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 167968, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, "a0e4021329290b41fa0982ab89a91b130c77d8347c42b3bd57364b7eae3b8ea2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 184432, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, "65981f3a4c4dee24cfacb26d74fe46b2cb7c267aaca8ca4e0c8b307119fdcf55"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 167952, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, "f236745ea418a50bcc1fa4139416290ff534ed757a9428e174c1b6dbd3b70afb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "1983b7e6fbae8ada258472a5458ee66c2b6d8b3e22b511202e8c1c52db12ba7c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "479087ea28d1bb2146463d5670980b8bf2b026843dad339be8658ecffe15be1d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "ab45f15945e66bb48fb7d9bcaf70c194787ef8308a0ce8d12328e0d5cf5d9ec2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "d878bff84d4e31aa0d0005a848a2b32747a750c795a0e79eda9227ba4021fc28"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "b3a7ca2fbff7ca9cdad9fe5b3e0201d3199794e16786b8ad92d5c8ec3ec2eb45"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "27e773d2cfc92b401c4c720bb26bcc4edcff2801027da8b77e8a91cdc5422118"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "8bab09baf17b083b246d1386f334c026ce687058941e756e4ed30bc0460475e5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "f3c8c7b2af2e3541236c6fa0beb0eeda0daede78a2794427dcad65201953e04d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "7c8948d0e0fb0fe960ea5e75095f8fc34a11d8011cd121aad0c9eb20e631bf2f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "2a27c9f327b8c30173d723ae4ec97e1d2bd96561658c95d932a28b76859483d8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "07e28172b719fabb93229f51348cd1b6730ccaae6471c070f05f3c02ac1e46d0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "1a940e3f6d366c9a1619be66736af77c55c3f64058f04222d9ac34981c7b7d06"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "dc14bb4c475f955afa6423133e16c05b48ea5baea538fb67044cc6d14c1cf8af"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "0007149b93661145560942dab22b8f7a5c80b8d2800f490ac03f40869e49b4b0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "4a14d7048af482090efd8d2c328c05718be2ed2b95bc05fdc33763cc6d54f24a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "a4850cd5185c878475b164f65334138f1d5386fa0b6562e892bff80ccdd82fc8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "b5036ced0c0c256ad31f1b146ebee7df8aab200fe4be3096e402d084084e190b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "ca91742a9667d94e2fa25465f7f64dda3de1944de223b9d0730d86e4d5b71fd7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "33dfab9535bf4ce4e6612bab4d2d89fb649b3de7f545a0224b3b10cd57beddee"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "fbd1a15445474a11d04467a90788cf1f61df580a2d8770b290cf207edfb55cb9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "09f75a12c7d57ed9ada07cf1074842e8be1810ea2d43cd4913733ab0b744499d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "eff44ea82583c45ac99072cdb4a0674adac16e9a60b1e0361e864d369178dfe8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "42e14a6df99b350cbc721b04d0d8aa22154de23df464a1121a5fe93cfe6cd30a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "e0a3b8a85822b09d7310ff67ef0d6e5e3b570e74c24add741b03b8213949e06c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "34e56a0e0a752c640b507ad15c62914f95ba07bf250dd28260196863884c5c28"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 86032, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "2e1cd2638e57f0c9e6efd60e776d1266bc24d6cccd09e91b8c99206b2a551d30"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "554a0ac9a0ae8a523ae146f009973510a3eb3f7b4b5defe91d53f0d72c3e7cdc"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "e80eec9cfe7b04a1c2c2bdc087633b46f02af06e037c430078f360c34898055a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "3e42845a38b17f7527bb026e89edd57206eb22079e6bdc59b06f3639b42ca60c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "543299bde7bacdf9f145d2239a56a638d41d09a023b3c8e207a96a0ba3cfe05f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197840, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "a65e55213cc3e6fa0ca3bb1f934c249ec6e83dbfc35bd6c76242e69e2f67b4aa"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 192208, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "8614f3baf45cd121e3edd14a32bd2eeb916d5008dc5cb822e55097183bde0471"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "495e2bac15f380afb85cc5ff35aa95ac78bee1134dfb9cf602f7ad12e37daf99"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "08c3ae707bd18280359f231f10d0ff3d261208d133223e8627e00f2708409107"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 86128, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "4b572b039d74eb7a3e7afad1577bf5d5996f97612af877d13040275a87fbcbf9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 86032, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "bfdaabbd7bb5f294a254b2e4a1ddab62d775adbfc62de2f2a7800652ce8fd259"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 167152, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "eb104a983f794f1ea29314eed2a50211fc64eb2650c42c9eed2a9baa58221bff"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 162960, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "d623de145546679c1f4cd557b310c99f5a24bc81dfadebd146e5b9ca67e64e58"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 159472, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "b66d3c756b32591b7fdb5d98e906c01c7ef0753522ad755b4d9d6e4daf188e78"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 157328, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "ff391eb062ab796bb2636f2ee554c75d3c1aaeae53c09ea0884cf41dd6c9d563"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 131184, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "028fe7a8b152d000dae181ba1db8dca99424abb701c4c0de7195f875037acb8a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 131088, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "709861e19c7eb3e22e89d79edf830460d1af46e752af46c4a08ee660e502f80e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 131184, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "bc07558fbbe9e842e1d45beb2e96e722dca60a9ae4c373e6d3bcd4237f201f2b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 131088, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "80272770ebbdb894fd10c1109f227fab2136e581bf369588104e416117d93176"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 201936, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "43f44388af1a0827d8b3fb99934735dc8d49e91b8ced32f3ac53233bd20011e8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 198352, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "4da81a6098eabd3491912a770dde4914f370fcc63d1c2b580512b7b16d61b615"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "d90da544d866c8d7b28c4a1a6c453fad1076f13ef456c2fc75c04fdd3e3360d2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "89668882e2ee8a704589f8fff9c1ffef78eda622d0f11fe5ea785890f5fc8a53"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 131184, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "b544c6bc47504e41e3bb48191afd8942741ddf45652d4088d934f6b9690a79a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 131088, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "4874cfb762daeb4940a0fd2a549edf267e06d7dcc468c193f735d57d4cf2faab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 172272, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "40f7fee2be2f74f26721c9209324939b1bce8e5f261b8f70b636cccfd54bdd71"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "c81e50a9dca25cebfc6a6204d7c228119bcfe810e6cfd89586364f1963226d7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "cee488e716447b89a1e0fb9c2899340e07b5286a9a297636263ee043f20ff0d0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "26643645e315dad994324b548172991a1a569f5f34d3bbc8d1e7c4dd5bae9b63"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 201936, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "7b6fe74504db0d491791a57e03ee4cedd9d3f5ce0e73791d082083945f36b15e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 198352, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "47632d0bdd3d7d4e2904846045e3974bf3b6cad21460db1cad44c971acedab35"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "486cff2b03f5d0479b5957db34a37a9d38d2cc74d9631cf7b6bd705ce2b69c05"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "fce4c0601d6b9dc26e757d9a4f38e330336a56ebc70202064186b7091095fd56"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 131184, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "531954c199a5b42daa5ea8f2a5d04071ce921c156774bc5313f33538b79a1551"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 131088, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "8a9d75c53afa86ca90a4b5a8c64e2ce6b35e2ad6120d89a17ee303fa2a5c7097"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 172272, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "b8d9b07cc2f3317f0fffe6490ca854347fd4cc63e19602f1b5bf5c5df44ca03b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "bb487f7c79f2e871af86d2c00f908763e2f06411e28165b93b7c0315d2be3bbc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "8f9f47bfa86ad15533fb0667f1cdc14b784de55e200ccb32ac093ad61278989b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "5fb9d33f30d6867d3ba8ed2cc1ea79d6994b4559c7683496aadfc60a130298be"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 201936, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "e1d0daee159da68013a826e9a8a7ed99b95d86a468bff24b0eac38cdefc352c8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 198352, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "9bbde00ea89fdad47dafb82e3ed0b43e04e31bc66fcb43a21481c6a67fc919bd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "30617c56e8e6606db716df1cdfdc8ca5ca512d304cfcd287ba6544baba901833"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "4b965b930993f6250734b20a6bd32ce9c77ff13bba33db53b7ace271e53722cf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 131184, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "cea56d9bc732f3cf557b546c09b884a78c2e92c979859c5f0f024f8a6886a4d9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 131088, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "ee966e3b8a9b9a418c672fb29236f456f5e3b0d20a10b5cf34eb0b56d1a383aa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 172272, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "4d16b7185795a7dec450744d2c371f7d8c7191159d173de68260ca9a798bce80"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "cd4a83e7d8968b9a45aefcf351a9fc4dbdaa8f5007b4e072ff784f375a065a3f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "70e2f2e4e088e357c06d03b9867eb2d7df9ffc6f2ea02b80dc64ec436bd9a239"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "58228cbf23c654aba481db2e97cba131a3b402835c84f00e49ab2d167166b98a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 201936, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "036994d43098ad1c4037090026ad4d5b03cf6193b4f9e3b8b6090922c5c818e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 198352, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "95bcd1767db908049dadc39033ab866c321847a76ec233fe27f203a9b2ac183e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "b8d20e2aea81a7c8d033608ef8b92f27934d2d13cd6551115e62290f1c4ac16a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "9aa93af5917b33e66b309eb30d5d1fd1c20c918cde7caf46fa17aab72740500d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 131184, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "140e19591dc1f700821b692759057c733c11131aa73a328b475c8f3cf9c86aaf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 131088, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "73025fab8e199abe397f56f419c6525b34ff0f54356a64eba169429bf82c9262"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 172272, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "b0c4c8c57341e0d15bb47dc9c7dbb69f7988a72fa717a3642cd8c43474bcbb06"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 168080, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "c5764019fbfd6233ee2c25a87d3725c824286d396b0d74f00c5f463bed5e9fd3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 166640, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "9a5465816df9016fe8e8599f756e992ba42b343e5de86485e8b83e7a64194f6b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 164496, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "864d6f1f65e194f99c55b0369651d664d069852f7e6d436e8152c3628af6e329"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 67696, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, "f68a83e323174e205996d120a29bf68ddb55c21c34c2812582fc30a4d68724ca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 67600, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, "0b9c2c1371eb627d9b0e728b865db6e0dc7de4d981a2ab3253a1de8278786397"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 67696, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, "f2d07f1265eeec6ed6f1de8cfbf765aad0a152fcd8c58ae64f9b7dbfa02c4207"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 67600, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, "b2a49489a7c9ea55b66a939fccbde02e847188bc75db5b5962b79fdaa6a3418b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 162000, 512, 2, 32, 0, 2, 16, 0, 2, true, false, false, "22362974e646b308dedb80225d44dd28ec68ef0a378a987c4ebd2c611f34ceb2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 158416, 512, 2, 32, 0, 2, 8, 0, 2, true, false, false, "65f792f51ac63db92b017c1ba04daaeecac970156f4e264c541ff066af705d3d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, "e6ece7f11c58af2dec660f9db44c59f107a3989fe19fbc0e68fca0a3c55c7f83"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, "f678ba5c63aad9bb9768eea3d4226809f39ecd78e9fc5f61d0fa5ce1b9095260"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 67696, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, "8e4dfe69333516521b9f9ccb305bb4df7045f77a25128d24c1bccc2249f2f7bb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 67600, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, "de28950bb4135188823ceb6d7191d481475b38e07959d7dbf6f3ed45d5d9777b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 129264, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, "83587843d38b3398f5d03351ac8635fdd8b7aa32f6cabdb772f56ffe75449a12"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, "427417950d16c9f4a18e42cf66cd6a0eb00a941eeb178f5b440be2755f58ad1c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 125168, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, "bd6ff2484ec9fc922524b837dd001d5af728d5363bf5dfb4a75a8bdd2fbae87e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, "bd523589d6118e3e9e10d973f4240bc05f5d813ce28a5364e92bc93d3d7bbaa6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 162000, 512, 2, 64, 0, 2, 16, 0, 2, true, false, false, "37aebf88aa91958450d3729dead7cbc46e1462a148fc453722e767b2d8e9da1c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 158416, 512, 2, 64, 0, 2, 8, 0, 2, true, false, false, "c37532ae3c8b65df4628a89841dc87bbdfdc4dcd4995b5b456d436def189cb19"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, "fe37db2d778ee4f0462fd6683ebf4098223c886c35445329ed0d43eff9ab7309"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, "c8906e469b6a88c8155d5d82d560660d63424ede7c2ed6e913f129398880096f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 67696, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, "79a577f0b92a5cc10011334d3da10d97613a940d660060a74ba6faf518cf3bed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 67600, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, "e6307131547fdbcc2bfbb1c60869483764b7dfe3cc6d14b3673f278f6515191c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 129264, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, "14a18ff7191b3aaa4e3c2356888e988e3ff4dca50d65fcae563af4099fddb1e4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, "24164b58cc6a0ed69f1d71000c394e0f82dad9e877256c5d340722ceeb6421da"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 125168, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, "1ed1ffd8448c89cb35672c779973dfd3cdcc5fb4eef594cfcd134f2683d9fa5c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, "907915b37201b4296e95e3f51cb0cd10730897ddb17e07c97534f745916f9363"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 162000, 512, 2, 32, 2, 2, 16, 0, 2, true, false, false, "d99c75f63a67b397e77ed79752a20236c6a2550eff2bd5e50dd2fb544ac2cc3f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 158416, 512, 2, 32, 2, 2, 8, 0, 2, true, false, false, "489d41d3b91afd088873de469e80bab145d91c94f76e664307308b959b1dc025"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, "ea8d3ddbc93f08186ed14b74699702cc1efaae0171e925700743bb048737c000"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, "8def3ba880b609ee2801e7b8e8b15797410ebc3b245b7684071335a9c03357d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 67696, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, "385c1ea436b1653afb94eaac4859d16d0bf4c08741e4be379f4dcb2a5b3d3371"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 67600, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, "da7897a0eefaf7c84ac516a1f557e6661d46ee0e986b5627cf9ca6c696758b42"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 129264, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, "41e3071524ddcc241bc8c8d78d0a6c2aa74bb9bf25f3483310fcc3803b5e2917"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, "b75920da7ce878b65f6f61aef913e86fd8820444ce8baeef52befb3dba8625d0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 125168, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, "4ed7885ab23340200c8c33269f3e96f1aed4224b3c38c8ad3ae2f111772fa6f8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, "ac2ed7bce287497f79fb6eed0c48be1799285a21385c320e5721d9f687f9a128"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 162000, 512, 2, 64, 2, 2, 16, 0, 2, true, false, false, "defaa79b942d991f9d196898cb79872006e75e58aaae5cb0c98e26f2c416337b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 158416, 512, 2, 64, 2, 2, 8, 0, 2, true, false, false, "242744454407647588ad387552bb1a5b3583bac596e8cf6cc336ff72483cb158"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, "5a407731c6f8328e457d0e85763e0b8f1361d0fd85653a6d5f8d2726536fc8f6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, "a2dfd6f311b19b79df4e7b0e6b6a4f56e62dfe8f0f2a641e11b95a9efe1f3a2d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 67696, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, "22a59ba664003e0327c81398d3e1838a0002dc083a26b2352f045942edf3784d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 67600, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, "62035636576e7ec865516112e7949fe40d2cc17ec0934ebe5f8c08b39f0efc8e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 129264, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, "9ffc7d07a7a7a13b62be63374462e6bab4767c754ead53aa67e031d82d609a04"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 127120, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, "fd62f696a5905faf0a2b6381c3178981dadbc49189bdd5eb148ec033ad7f193a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 125168, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, "7372a86586e25cc68b20cd59400a862e761811a8b7cf2022a19e12e29a314394"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 123536, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, "2d56b1ed12714374f96ab0178cc1b66da98c1e9a0a1b4b86f8c2263aa16d35da"}, #endif // EXCLUDE_SM_100 }; // clang-format on diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/fmhaKernels.h b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/fmhaKernels.h index 2e9964b8b7..d95a72d130 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/fmhaKernels.h +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/fmhaKernels.h @@ -30,6 +30,7 @@ #include "cubin/kernelMetaInfo.h" #include "fmhaRunnerParams.h" #include "kernelParams.h" +#include "tensorrt_llm/kernels/multiHeadAttentionCommon.h" namespace tc = tensorrt_llm::common; @@ -40,6 +41,20 @@ namespace kernels //////////////////////////////////////////////////////////////////////////////////////////////////// +constexpr bool isSMCompatible(int gpuSM, int kernelSM) +{ + if (gpuSM == kSM_103) + { + return kernelSM == kSM_100f || kernelSM == kSM_103; + } + else if (gpuSM == kSM_100) + { + return kernelSM == kSM_100f || kernelSM == kSM_100; + } + + return gpuSM == kernelSM; +} + class TllmGenFmhaKernel { public: @@ -66,7 +81,7 @@ public: for (unsigned int i = 0; i < mKernelMetaCount; ++i) { auto const& kernelMeta = mKernelMeta[i]; - if (static_cast(kernelMeta.mSM) == mSM && kernelMeta.mDataTypeQ == mDtypeQ + if (isSMCompatible(mSM, kernelMeta.mSM) && kernelMeta.mDataTypeQ == mDtypeQ && kernelMeta.mDataTypeKv == mDtypeKv && kernelMeta.mDataTypeO == mDtypeOut) { // Load CUmodules @@ -618,7 +633,7 @@ inline TllmGenFmhaKernel const* getTllmFmhaKernels( Data_type dtypeQ, Data_type dtypeKv, Data_type dtypeOut, unsigned int sm) { -#ifndef EXCLUDE_SM_100 +#if !defined(EXCLUDE_SM_100) || !defined(EXCLUDE_SM_103) return TllmFmhaKernelFactory::Get().getKernels(sTllmGenFmhaKernelMetaInfos, sizeof(sTllmGenFmhaKernelMetaInfos) / sizeof(sTllmGenFmhaKernelMetaInfos[0]), dtypeQ, dtypeKv, dtypeOut, sm); #else diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/fmhaRunner.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/fmhaRunner.cpp index 85effad9b7..eca8d18d15 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/fmhaRunner.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/fmhaRunner.cpp @@ -35,10 +35,6 @@ TllmGenFmhaRunner::TllmGenFmhaRunner(Data_type dtypeQ, Data_type dtypeKv, Data_t , mDtypeOut(dtypeOut) { TLLM_CHECK_WITH_INFO(mSM == kSM_100 || mSM == kSM_103, "Unsupported architecture"); - if (mSM == kSM_103) - { - mSM = kSM_100; // Currently we use same kernel for SM100 and SM103 - } TLLM_CHECK_WITH_INFO( mDtypeQ == DATA_TYPE_E4M3 || mDtypeQ == DATA_TYPE_FP16 || mDtypeQ == DATA_TYPE_BF16, "Unsupported Q data type"); TLLM_CHECK_WITH_INFO(mDtypeKv == DATA_TYPE_E2M1 || mDtypeKv == DATA_TYPE_E4M3 || mDtypeKv == DATA_TYPE_FP16 diff --git a/requirements.txt b/requirements.txt index ffaa379816..0a574d5333 100644 --- a/requirements.txt +++ b/requirements.txt @@ -70,7 +70,7 @@ ninja etcd3 blake3 soundfile -triton>=3.3.1,<=3.4.0; platform_machine == "x86_64" +triton>=3.3.1,<3.4.0; platform_machine == "x86_64" tiktoken blobfile openai-harmony==0.0.4 diff --git a/tests/integration/test_lists/test-db/l0_dgx_b300.yml b/tests/integration/test_lists/test-db/l0_dgx_b300.yml index a6ecd6bf74..81547dca94 100644 --- a/tests/integration/test_lists/test-db/l0_dgx_b300.yml +++ b/tests/integration/test_lists/test-db/l0_dgx_b300.yml @@ -14,4 +14,63 @@ l0_dgx_b300: stage: post_merge backend: pytorch tests: + - unittest/_torch/attention + - unittest/_torch/thop + - unittest/_torch/executor + - unittest/_torch/modules + - unittest/_torch/modeling -k "modeling_llama" + - unittest/_torch/modeling -k "modeling_mixtral" + - unittest/_torch/modeling -k "modeling_deepseek" + - unittest/_torch/modeling -k "modeling_gpt_oss" - unittest/_torch/multi_gpu_modeling -k "deepseek" + - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall_fp4[DeepEP] + - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall_fp4[DeepEPLowLatency] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=FLASHINFER-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[pp4-attn_backend=TRTLLM-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=False-attn_backend=FLASHINFER-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=True-attn_backend=TRTLLM-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[pp4-fp8kv=False-attn_backend=TRTLLM-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp4-mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp4-mtp_nextn=2-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[ep4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[ep4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp2pp2-mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp2pp2-mtp_nextn=2-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp2pp2-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp2pp2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp2pp2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-pp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-pp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=0-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=0-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-ep4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-tp2pp2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-tp2pp2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-pp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=2-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[tep4_latency_moe_trtllm-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[tep4_latency_moe_trtllm-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[tep4_latency_moe_cutlass-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_trtllm-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_cutlass-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8[tp4-cuda_graph=True] + - accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4[tp4-cuda_graph=True] + - accuracy/test_disaggregated_serving.py::TestQwen3_30B_A3B::test_mixed_ctx_gen_model[ctxpp2gentp2] + - disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_ucx[DeepSeek-V3-Lite-fp8] + - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass] + - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-trtllm] + - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton] + - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4] + - disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_nixl[DeepSeek-V3-Lite-fp8] + - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-bf16] + - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] + - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-3.1-8b-instruct-hf-fp8] + - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-fp8] + - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend + - accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend + # ------------- AutoDeploy tests --------------- diff --git a/tests/integration/test_lists/test-db/l0_gb300_multi_gpus.yml b/tests/integration/test_lists/test-db/l0_gb300_multi_gpus.yml index a4b49268f3..f40a44d75d 100644 --- a/tests/integration/test_lists/test-db/l0_gb300_multi_gpus.yml +++ b/tests/integration/test_lists/test-db/l0_gb300_multi_gpus.yml @@ -14,4 +14,46 @@ l0_gb300_multi_gpus: stage: post_merge backend: pytorch tests: + - unittest/_torch/attention + - unittest/_torch/thop + - unittest/_torch/executor + - unittest/_torch/modules + - unittest/_torch/modeling -k "modeling_llama" + - unittest/_torch/modeling -k "modeling_mixtral" + - unittest/_torch/modeling -k "modeling_deepseek" + - unittest/_torch/modeling -k "modeling_gpt_oss" - unittest/_torch/multi_gpu_modeling -k "deepseek" + - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall_fp4[DeepEP] + - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall_fp4[DeepEPLowLatency] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=True-attn_backend=TRTLLM-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=True-attn_backend=FLASHINFER-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[pp4-fp8kv=False-attn_backend=TRTLLM-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp2pp2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-tp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-ep4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-tp2pp2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-tp2pp2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-pp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=2-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=2-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] + - accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4_4gpus[latency_moe_trtllm_eagle3] TIMEOUT (90) + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=TRTLLM-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=FLASHINFER-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[pp4-attn_backend=TRTLLM-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=False-attn_backend=TRTLLM-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=False-attn_backend=FLASHINFER-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[pp4-fp8kv=True-attn_backend=TRTLLM-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[ep4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-ep4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp2pp2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-pp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-pp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=0-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] + - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=0-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]